From 7359b3d16f6112b52e8303c44973ef0ad1fc68ea Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Thu, 7 Jul 2022 17:34:45 -0700 Subject: [PATCH 01/15] feature: query lineage visualizer for general case edge.association_type added style changes of graph --- src/sagemaker/lineage/query.py | 119 +++++++++++++++++++++++++++++---- tests/data/_repack_model.py | 110 ++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+), 13 deletions(-) create mode 100644 tests/data/_repack_model.py diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 72bde00a1a..7345911df0 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -97,12 +97,12 @@ def __str__(self): Format: { - 'source_arn': 'string', 'destination_arn': 'string', + 'source_arn': 'string', 'destination_arn': 'string', 'association_type': 'string' } - + """ - return (str(self.__dict__)) + return str(self.__dict__) class Vertex: @@ -147,13 +147,13 @@ def __str__(self): Format: { - 'arn': 'string', 'lineage_entity': 'string', - 'lineage_source': 'string', + 'arn': 'string', 'lineage_entity': 'string', + 'lineage_source': 'string', '_session': } - + """ - return (str(self.__dict__)) + return str(self.__dict__) def to_lineage_object(self): """Convert the ``Vertex`` object to its corresponding lineage object. @@ -226,29 +226,122 @@ def __init__( def __str__(self): """Define string representation of ``LineageQueryResult``. - + Format: { 'edges':[ { - 'source_arn': 'string', 'destination_arn': 'string', + 'source_arn': 'string', 'destination_arn': 'string', 'association_type': 'string' }, ... ] 'vertices':[ { - 'arn': 'string', 'lineage_entity': 'string', - 'lineage_source': 'string', + 'arn': 'string', 'lineage_entity': 'string', + 'lineage_source': 'string', '_session': }, ... ] } - + """ result_dict = vars(self) - return (str({k: [vars(val) for val in v] for k, v in result_dict.items()})) + return str({k: [vars(val) for val in v] for k, v in result_dict.items()}) + + def _import_visual_modules(self): + """Import modules needed for visualization.""" + import dash_cytoscape as cyto + + from jupyter_dash import JupyterDash + + from dash import html + + return cyto, JupyterDash, html + + def _get_verts(self): + """Convert vertices to tuple format for visualizer""" + verts = [] + for vert in self.vertices: + verts.append((vert.arn, vert.lineage_source)) + return verts + + def _get_edges(self): + """Convert edges to tuple format for visualizer""" + edges = [] + for edge in self.edges: + edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) + return edges + + def visualize(self): + """Visualize lineage query result.""" + + cyto, JupyterDash, html = self._import_visual_modules() + + cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts + app = JupyterDash(__name__) + + verts = self._get_verts() + edges = self._get_edges() + + nodes = [ + { + "data": {"id": id, "label": label}, + } + for id, label in verts + ] + + edges = [ + { + "data": {"source": source, "target": target, "label": label} + } + for source, target, label in edges + ] + + elements = nodes + edges + + app.layout = html.Div( + [ + cyto.Cytoscape( + id="cytoscape-layout-1", + elements=elements, + style={"width": "100%", "height": "350px"}, + layout={"name": "klay"}, + stylesheet=[ + { + "selector": "node", + "style": { + "label": "data(label)", + "font-size": "3.5vw", + "height": "10vw", + "width": "10vw" + } + }, + { + "selector": "edge", + "style": { + "label": "data(label)", + "color": "gray", + "text-halign": "left", + "text-margin-y": "3px", + "text-margin-x": "-2px", + "font-size": "3%", + "width": "1%", + "curve-style": "taxi", + "target-arrow-color": "gray", + "target-arrow-shape": "triangle", + "line-color": "gray", + "arrow-scale": "0.5" + }, + }, + ], + responsive=True, + ) + ] + ) + + return app.run_server(mode="inline") class LineageFilter(object): diff --git a/tests/data/_repack_model.py b/tests/data/_repack_model.py new file mode 100644 index 0000000000..3cfa6760b3 --- /dev/null +++ b/tests/data/_repack_model.py @@ -0,0 +1,110 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Repack model script for training jobs to inject entry points""" +from __future__ import absolute_import + +import argparse +import os +import shutil +import tarfile +import tempfile + +# Repack Model +# The following script is run via a training job which takes an existing model and a custom +# entry point script as arguments. The script creates a new model archive with the custom +# entry point in the "code" directory along with the existing model. Subsequently, when the model +# is unpacked for inference, the custom entry point will be used. +# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html + +# distutils.dir_util.copy_tree works way better than the half-baked +# shutil.copytree which bombs on previously existing target dirs... +# alas ... https://bugs.python.org/issue10948 +# we'll go ahead and use the copy_tree function anyways because this +# repacking is some short-lived hackery, right?? +from distutils.dir_util import copy_tree + + +def repack(inference_script, model_archive, dependencies=None, source_dir=None): # pragma: no cover + """Repack custom dependencies and code into an existing model TAR archive + + Args: + inference_script (str): The path to the custom entry point. + model_archive (str): The name or path (e.g. s3 uri) of the model TAR archive. + dependencies (str): A space-delimited string of paths to custom dependencies. + source_dir (str): The path to a custom source directory. + """ + + # the data directory contains a model archive generated by a previous training job + data_directory = "/opt/ml/input/data/training" + model_path = os.path.join(data_directory, model_archive.split("/")[-1]) + + # create a temporary directory + with tempfile.TemporaryDirectory() as tmp: + local_path = os.path.join(tmp, "local.tar.gz") + # copy the previous training job's model archive to the temporary directory + shutil.copy2(model_path, local_path) + src_dir = os.path.join(tmp, "src") + # create the "code" directory which will contain the inference script + code_dir = os.path.join(src_dir, "code") + os.makedirs(code_dir) + # extract the contents of the previous training job's model archive to the "src" + # directory of this training job + with tarfile.open(name=local_path, mode="r:gz") as tf: + tf.extractall(path=src_dir) + + if source_dir: + # copy /opt/ml/code to code/ + if os.path.exists(code_dir): + shutil.rmtree(code_dir) + shutil.copytree("/opt/ml/code", code_dir) + else: + # copy the custom inference script to code/ + entry_point = os.path.join("/opt/ml/code", inference_script) + shutil.copy2(entry_point, os.path.join(code_dir, inference_script)) + + # copy any dependencies to code/lib/ + if dependencies: + for dependency in dependencies.split(" "): + actual_dependency_path = os.path.join("/opt/ml/code", dependency) + lib_dir = os.path.join(code_dir, "lib") + if not os.path.exists(lib_dir): + os.mkdir(lib_dir) + if os.path.isfile(actual_dependency_path): + shutil.copy2(actual_dependency_path, lib_dir) + else: + if os.path.exists(lib_dir): + shutil.rmtree(lib_dir) + # a directory is in the dependencies. we have to copy + # all of /opt/ml/code into the lib dir because the original directory + # was flattened by the SDK training job upload.. + shutil.copytree("/opt/ml/code", lib_dir) + break + + # copy the "src" dir, which includes the previous training job's model and the + # custom inference script, to the output of this training job + copy_tree(src_dir, "/opt/ml/model") + + +if __name__ == "__main__": # pragma: no cover + parser = argparse.ArgumentParser() + parser.add_argument("--inference_script", type=str, default="inference.py") + parser.add_argument("--dependencies", type=str, default=None) + parser.add_argument("--source_dir", type=str, default=None) + parser.add_argument("--model_archive", type=str, default="model.tar.gz") + args, extra = parser.parse_known_args() + repack( + inference_script=args.inference_script, + dependencies=args.dependencies, + source_dir=args.source_dir, + model_archive=args.model_archive, + ) From d5244167c2d5c0521a8dffcc383972651348a463 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Thu, 14 Jul 2022 10:27:31 -0700 Subject: [PATCH 02/15] startarn added to lineageQueryResult --- src/sagemaker/lineage/query.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 7345911df0..400cf5ceeb 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -15,6 +15,7 @@ from datetime import datetime from enum import Enum +from tracemalloc import start from typing import Optional, Union, List, Dict from sagemaker.lineage._utils import get_resource_name_from_arn @@ -208,6 +209,7 @@ def __init__( self, edges: List[Edge] = None, vertices: List[Vertex] = None, + startarn: List[str] = None, ): """Init for LineageQueryResult. @@ -217,6 +219,7 @@ def __init__( """ self.edges = [] self.vertices = [] + self.startarn = [] if edges is not None: self.edges = edges @@ -224,6 +227,9 @@ def __init__( if vertices is not None: self.vertices = vertices + if startarn is not None: + self.startarn = startarn + def __str__(self): """Define string representation of ``LineageQueryResult``. @@ -248,7 +254,7 @@ def __str__(self): """ result_dict = vars(self) - return str({k: [vars(val) for val in v] for k, v in result_dict.items()}) + return str({k: [str(val) for val in v] for k, v in result_dict.items()}) def _import_visual_modules(self): """Import modules needed for visualization.""" @@ -417,9 +423,8 @@ def _get_vertex(self, vertex): sagemaker_session=self._session, ) - def _convert_api_response(self, response) -> LineageQueryResult: + def _convert_api_response(self, response, converted) -> LineageQueryResult: """Convert the lineage query API response to its Python representation.""" - converted = LineageQueryResult() converted.edges = [self._get_edge(edge) for edge in response["Edges"]] converted.vertices = [self._get_vertex(vertex) for vertex in response["Vertices"]] @@ -502,7 +507,9 @@ def query( Filters=query_filter._to_request_dict() if query_filter else {}, MaxDepth=max_depth, ) - query_response = self._convert_api_response(query_response) + # create query result for startarn info + query_result = LineageQueryResult(startarn=start_arns) + query_response = self._convert_api_response(query_response, query_result) query_response = self._collapse_cross_account_artifacts(query_response) return query_response From acf64f444abe76ea0ae2eab081481d04d2736674 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Thu, 14 Jul 2022 11:21:20 -0700 Subject: [PATCH 03/15] color node by lineage entity --- src/sagemaker/lineage/query.py | 44 ++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 400cf5ceeb..a6b56ae7a8 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -236,18 +236,22 @@ def __str__(self): Format: { 'edges':[ - { + "{ 'source_arn': 'string', 'destination_arn': 'string', 'association_type': 'string' - }, + }", ... - ] + ], 'vertices':[ - { + "{ 'arn': 'string', 'lineage_entity': 'string', 'lineage_source': 'string', '_session': - }, + }", + ... + ], + 'startarn':[ + 'string', ... ] } @@ -270,7 +274,7 @@ def _get_verts(self): """Convert vertices to tuple format for visualizer""" verts = [] for vert in self.vertices: - verts.append((vert.arn, vert.lineage_source)) + verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) return verts def _get_edges(self): @@ -288,14 +292,16 @@ def visualize(self): cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts app = JupyterDash(__name__) + # get vertices and edges info for graph verts = self._get_verts() edges = self._get_edges() nodes = [ { "data": {"id": id, "label": label}, + "classes": classes } - for id, label in verts + for id, label, classes in verts ] edges = [ @@ -341,6 +347,30 @@ def visualize(self): "arrow-scale": "0.5" }, }, + { + "selector": ".Artifact", + "style": { + "background-color": "#146eb4" + } + }, + { + "selector": ".Context", + "style": { + "background-color": "#ff9900" + } + }, + { + "selector": ".TrialComponent", + "style": { + "background-color": "#f6cf61" + } + }, + { + "selector": ".Action", + "style": { + "background-color": "#88c396" + } + } ], responsive=True, ) From e07e83c774fad640761501080f6d50a0264f282c Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Thu, 14 Jul 2022 11:43:33 -0700 Subject: [PATCH 04/15] identify startarn node by shape --- src/sagemaker/lineage/query.py | 62 +++++++++++----------------------- 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index a6b56ae7a8..030dc866c0 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -15,7 +15,6 @@ from datetime import datetime from enum import Enum -from tracemalloc import start from typing import Optional, Union, List, Dict from sagemaker.lineage._utils import get_resource_name_from_arn @@ -271,14 +270,17 @@ def _import_visual_modules(self): return cyto, JupyterDash, html def _get_verts(self): - """Convert vertices to tuple format for visualizer""" + """Convert vertices to tuple format for visualizer.""" verts = [] for vert in self.vertices: - verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) + if vert.arn in self.startarn: + verts.append((vert.arn, vert.lineage_source, vert.lineage_entity + " startarn")) + else: + verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) return verts def _get_edges(self): - """Convert edges to tuple format for visualizer""" + """Convert edges to tuple format for visualizer.""" edges = [] for edge in self.edges: edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) @@ -286,7 +288,6 @@ def _get_edges(self): def visualize(self): """Visualize lineage query result.""" - cyto, JupyterDash, html = self._import_visual_modules() cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts @@ -297,17 +298,11 @@ def visualize(self): edges = self._get_edges() nodes = [ - { - "data": {"id": id, "label": label}, - "classes": classes - } - for id, label, classes in verts + {"data": {"id": id, "label": label}, "classes": classes} for id, label, classes in verts ] edges = [ - { - "data": {"source": source, "target": target, "label": label} - } + {"data": {"source": source, "target": target, "label": label}} for source, target, label in edges ] @@ -322,13 +317,13 @@ def visualize(self): layout={"name": "klay"}, stylesheet=[ { - "selector": "node", + "selector": "node", "style": { - "label": "data(label)", - "font-size": "3.5vw", + "label": "data(label)", + "font-size": "3.5vw", "height": "10vw", - "width": "10vw" - } + "width": "10vw", + }, }, { "selector": "edge", @@ -344,33 +339,14 @@ def visualize(self): "target-arrow-color": "gray", "target-arrow-shape": "triangle", "line-color": "gray", - "arrow-scale": "0.5" + "arrow-scale": "0.5", }, }, - { - "selector": ".Artifact", - "style": { - "background-color": "#146eb4" - } - }, - { - "selector": ".Context", - "style": { - "background-color": "#ff9900" - } - }, - { - "selector": ".TrialComponent", - "style": { - "background-color": "#f6cf61" - } - }, - { - "selector": ".Action", - "style": { - "background-color": "#88c396" - } - } + {"selector": ".Artifact", "style": {"background-color": "#146eb4"}}, + {"selector": ".Context", "style": {"background-color": "#ff9900"}}, + {"selector": ".TrialComponent", "style": {"background-color": "#f6cf61"}}, + {"selector": ".Action", "style": {"background-color": "#88c396"}}, + {"selector": ".startarn", "style": {"shape": "star"}}, ], responsive=True, ) From 687d6b64439ed20388946da67e30206975f4dbc9 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Thu, 14 Jul 2022 13:15:52 -0700 Subject: [PATCH 05/15] Add code comments --- src/sagemaker/lineage/query.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 030dc866c0..83dedb5515 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -272,8 +272,10 @@ def _import_visual_modules(self): def _get_verts(self): """Convert vertices to tuple format for visualizer.""" verts = [] + # get vertex info in the form of (id, label, class) for vert in self.vertices: if vert.arn in self.startarn: + # add "startarn" class to node if arn is a startarn verts.append((vert.arn, vert.lineage_source, vert.lineage_entity + " startarn")) else: verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) @@ -282,6 +284,7 @@ def _get_verts(self): def _get_edges(self): """Convert edges to tuple format for visualizer.""" edges = [] + # get edge info in the form of (source, target, label) for edge in self.edges: edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) return edges From e3a4c9de08a3e56bcdbc983a651befd495a943e5 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Fri, 15 Jul 2022 13:35:28 -0700 Subject: [PATCH 06/15] Double sided arrows handled --- src/sagemaker/lineage/query.py | 37 ++++++++--- tests/data/_repack_model.py | 110 --------------------------------- 2 files changed, 29 insertions(+), 118 deletions(-) delete mode 100644 tests/data/_repack_model.py diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 83dedb5515..5361458290 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -267,7 +267,9 @@ def _import_visual_modules(self): from dash import html - return cyto, JupyterDash, html + from dash.dependencies import Input, Output + + return cyto, JupyterDash, html, Input, Output def _get_verts(self): """Convert vertices to tuple format for visualizer.""" @@ -287,11 +289,12 @@ def _get_edges(self): # get edge info in the form of (source, target, label) for edge in self.edges: edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) + edges.append((self.edges[1].destination_arn, self.edges[1].source_arn, self.edges[1].association_type)) return edges def visualize(self): """Visualize lineage query result.""" - cyto, JupyterDash, html = self._import_visual_modules() + cyto, JupyterDash, html, Input, Output = self._import_visual_modules() cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts app = JupyterDash(__name__) @@ -314,7 +317,7 @@ def visualize(self): app.layout = html.Div( [ cyto.Cytoscape( - id="cytoscape-layout-1", + id="cytoscape-graph", elements=elements, style={"width": "100%", "height": "350px"}, layout={"name": "klay"}, @@ -326,6 +329,9 @@ def visualize(self): "font-size": "3.5vw", "height": "10vw", "width": "10vw", + "border-width": "0.8", + "border-opacity": "0", + "border-color": "#232f3e" }, }, { @@ -334,11 +340,13 @@ def visualize(self): "label": "data(label)", "color": "gray", "text-halign": "left", - "text-margin-y": "3px", - "text-margin-x": "-2px", - "font-size": "3%", - "width": "1%", - "curve-style": "taxi", + "text-margin-y": "2.5", + "font-size": "3", + "width": "1", + "curve-style": "bezier", + "control-point-step-size": "15", + # "taxi-direction": "rightward", + # "taxi-turn": "50%", "target-arrow-color": "gray", "target-arrow-shape": "triangle", "line-color": "gray", @@ -350,12 +358,25 @@ def visualize(self): {"selector": ".TrialComponent", "style": {"background-color": "#f6cf61"}}, {"selector": ".Action", "style": {"background-color": "#88c396"}}, {"selector": ".startarn", "style": {"shape": "star"}}, + {"selector": ".select", "style": { "border-opacity": "0.7"}}, ], responsive=True, ) ] ) + @app.callback(Output("cytoscape-graph", "elements"), + Input("cytoscape-graph", "tapNodeData")) + def selectNode(data): + for n in nodes: + if data != None and n["data"]["id"] == data["id"]: + n["classes"] += " select" + else: + n["classes"] = n["classes"].replace("select", "") + + elements = nodes + edges + return elements + return app.run_server(mode="inline") diff --git a/tests/data/_repack_model.py b/tests/data/_repack_model.py deleted file mode 100644 index 3cfa6760b3..0000000000 --- a/tests/data/_repack_model.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Repack model script for training jobs to inject entry points""" -from __future__ import absolute_import - -import argparse -import os -import shutil -import tarfile -import tempfile - -# Repack Model -# The following script is run via a training job which takes an existing model and a custom -# entry point script as arguments. The script creates a new model archive with the custom -# entry point in the "code" directory along with the existing model. Subsequently, when the model -# is unpacked for inference, the custom entry point will be used. -# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html - -# distutils.dir_util.copy_tree works way better than the half-baked -# shutil.copytree which bombs on previously existing target dirs... -# alas ... https://bugs.python.org/issue10948 -# we'll go ahead and use the copy_tree function anyways because this -# repacking is some short-lived hackery, right?? -from distutils.dir_util import copy_tree - - -def repack(inference_script, model_archive, dependencies=None, source_dir=None): # pragma: no cover - """Repack custom dependencies and code into an existing model TAR archive - - Args: - inference_script (str): The path to the custom entry point. - model_archive (str): The name or path (e.g. s3 uri) of the model TAR archive. - dependencies (str): A space-delimited string of paths to custom dependencies. - source_dir (str): The path to a custom source directory. - """ - - # the data directory contains a model archive generated by a previous training job - data_directory = "/opt/ml/input/data/training" - model_path = os.path.join(data_directory, model_archive.split("/")[-1]) - - # create a temporary directory - with tempfile.TemporaryDirectory() as tmp: - local_path = os.path.join(tmp, "local.tar.gz") - # copy the previous training job's model archive to the temporary directory - shutil.copy2(model_path, local_path) - src_dir = os.path.join(tmp, "src") - # create the "code" directory which will contain the inference script - code_dir = os.path.join(src_dir, "code") - os.makedirs(code_dir) - # extract the contents of the previous training job's model archive to the "src" - # directory of this training job - with tarfile.open(name=local_path, mode="r:gz") as tf: - tf.extractall(path=src_dir) - - if source_dir: - # copy /opt/ml/code to code/ - if os.path.exists(code_dir): - shutil.rmtree(code_dir) - shutil.copytree("/opt/ml/code", code_dir) - else: - # copy the custom inference script to code/ - entry_point = os.path.join("/opt/ml/code", inference_script) - shutil.copy2(entry_point, os.path.join(code_dir, inference_script)) - - # copy any dependencies to code/lib/ - if dependencies: - for dependency in dependencies.split(" "): - actual_dependency_path = os.path.join("/opt/ml/code", dependency) - lib_dir = os.path.join(code_dir, "lib") - if not os.path.exists(lib_dir): - os.mkdir(lib_dir) - if os.path.isfile(actual_dependency_path): - shutil.copy2(actual_dependency_path, lib_dir) - else: - if os.path.exists(lib_dir): - shutil.rmtree(lib_dir) - # a directory is in the dependencies. we have to copy - # all of /opt/ml/code into the lib dir because the original directory - # was flattened by the SDK training job upload.. - shutil.copytree("/opt/ml/code", lib_dir) - break - - # copy the "src" dir, which includes the previous training job's model and the - # custom inference script, to the output of this training job - copy_tree(src_dir, "/opt/ml/model") - - -if __name__ == "__main__": # pragma: no cover - parser = argparse.ArgumentParser() - parser.add_argument("--inference_script", type=str, default="inference.py") - parser.add_argument("--dependencies", type=str, default=None) - parser.add_argument("--source_dir", type=str, default=None) - parser.add_argument("--model_archive", type=str, default="model.tar.gz") - args, extra = parser.parse_known_args() - repack( - inference_script=args.inference_script, - dependencies=args.dependencies, - source_dir=args.source_dir, - model_archive=args.model_archive, - ) From 1808f8799518cab34257ca89e8e1955649229903 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Mon, 18 Jul 2022 09:50:25 -0700 Subject: [PATCH 07/15] legend added --- src/sagemaker/lineage/query.py | 103 +++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 5361458290..6f526ce367 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -289,7 +289,6 @@ def _get_edges(self): # get edge info in the form of (source, target, label) for edge in self.edges: edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) - edges.append((self.edges[1].destination_arn, self.edges[1].source_arn, self.edges[1].association_type)) return edges def visualize(self): @@ -319,7 +318,7 @@ def visualize(self): cyto.Cytoscape( id="cytoscape-graph", elements=elements, - style={"width": "100%", "height": "350px"}, + style={"width": "85%", "height": "350px", 'display': 'inline-block', 'border-width': '1vw', "border-color": "#232f3e"}, layout={"name": "klay"}, stylesheet=[ { @@ -331,7 +330,8 @@ def visualize(self): "width": "10vw", "border-width": "0.8", "border-opacity": "0", - "border-color": "#232f3e" + "border-color": "#232f3e", + "font-family": "verdana" }, }, { @@ -351,6 +351,7 @@ def visualize(self): "target-arrow-shape": "triangle", "line-color": "gray", "arrow-scale": "0.5", + "font-family": "verdana" }, }, {"selector": ".Artifact", "style": {"background-color": "#146eb4"}}, @@ -361,7 +362,101 @@ def visualize(self): {"selector": ".select", "style": { "border-opacity": "0.7"}}, ], responsive=True, - ) + ), + html.Div([ + html.Div([ + html.Div( + style={ + 'background-color': "#f6cf61", + 'width': '1.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div( + style={ + 'width': '0.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div(' Trial Component', style={'display': 'inline-block', "font-size": "1.5vw"}), + ]), + html.Div([ + html.Div( + style={ + 'background-color': "#ff9900", + 'width': '1.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div( + style={ + 'width': '0.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div(' Context', style={'display': 'inline-block', "font-size": "1.5vw"}), + ]), + html.Div([ + html.Div( + style={ + 'background-color': "#88c396", + 'width': '1.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div( + style={ + 'width': '0.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div(' Action', style={'display': 'inline-block', "font-size": "1.5vw"}), + ]), + html.Div([ + html.Div( + style={ + 'background-color': "#146eb4", + 'width': '1.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div( + style={ + 'width': '0.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div(' Artifact', style={'display': 'inline-block', "font-size": "1.5vw"}), + ]), + html.Div([ + html.Div( + "★", + style={ + 'background-color': "white", + 'width': '1.5vw', + 'height': '1.5vw', + 'display': 'inline-block', + "font-size": "1.5vw" + } + ), + html.Div( + style={ + 'width': '0.5vw', + 'height': '1.5vw', + 'display': 'inline-block' + } + ), + html.Div('StartArn', style={'display': 'inline-block', "font-size": "1.5vw"}), + ]), + ], style={'width': '15%', 'display': 'inline-block', "font-size": "1vw", "font-family": "verdana", "vertical-align": "top"}) ] ) From 60904d5dbe6c77873d610ed7580de0bb0cd805bf Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Mon, 18 Jul 2022 17:07:54 -0700 Subject: [PATCH 08/15] try except raise --- src/sagemaker/lineage/query.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index a198d1ebf5..3f04800a50 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -215,19 +215,22 @@ def _import_visual_modules(self): import dash_cytoscape as cyto except ImportError as e: print(e) - print("try pip install dash-cytoscape") + print("Try: pip install dash-cytoscape") + raise try: from jupyter_dash import JupyterDash except ImportError as e: print(e) - print("try pip install jupyter-dash") + print("Try: pip install jupyter-dash") + raise try: from dash import html except ImportError as e: print(e) - print("try pip install dash") + print("Try: pip install dash") + raise return cyto, JupyterDash, html From 9125eb930e930375bb27d45d22d0f9765db24e08 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Tue, 19 Jul 2022 10:34:02 -0700 Subject: [PATCH 09/15] startarn added --- src/sagemaker/lineage/query.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 3f04800a50..86fd805d81 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -15,6 +15,7 @@ from datetime import datetime from enum import Enum +from tracemalloc import start from typing import Optional, Union, List, Dict from sagemaker.lineage._utils import get_resource_name_from_arn @@ -295,6 +296,7 @@ def __init__( self, edges: List[Edge] = None, vertices: List[Vertex] = None, + startarn: List[str] = None, ): """Init for LineageQueryResult. @@ -304,6 +306,7 @@ def __init__( """ self.edges = [] self.vertices = [] + self.startarn = [] if edges is not None: self.edges = edges @@ -311,6 +314,9 @@ def __init__( if vertices is not None: self.vertices = vertices + if startarn is not None: + self.startarn = startarn + def __str__(self): """Define string representation of ``LineageQueryResult``. @@ -335,7 +341,7 @@ def __str__(self): """ result_dict = vars(self) - return str({k: [vars(val) for val in v] for k, v in result_dict.items()}) + return str({k: [str(val) for val in v] for k, v in result_dict.items()}) def _covert_vertices_to_tuples(self): """Convert vertices to tuple format for visualizer.""" @@ -456,9 +462,8 @@ def _get_vertex(self, vertex): sagemaker_session=self._session, ) - def _convert_api_response(self, response) -> LineageQueryResult: + def _convert_api_response(self, response, converted) -> LineageQueryResult: """Convert the lineage query API response to its Python representation.""" - converted = LineageQueryResult() converted.edges = [self._get_edge(edge) for edge in response["Edges"]] converted.vertices = [self._get_vertex(vertex) for vertex in response["Vertices"]] @@ -541,7 +546,9 @@ def query( Filters=query_filter._to_request_dict() if query_filter else {}, MaxDepth=max_depth, ) - query_response = self._convert_api_response(query_response) + # create query result for startarn info + query_result = LineageQueryResult(startarn=start_arns) + query_response = self._convert_api_response(query_response, query_result) query_response = self._collapse_cross_account_artifacts(query_response) return query_response From 3db7fa17ac311ca76a583f5e897736085a229f65 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Tue, 19 Jul 2022 11:57:40 -0700 Subject: [PATCH 10/15] add get element function --- src/sagemaker/lineage/query.py | 50 ++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 6f526ce367..4fb62bad37 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -235,18 +235,18 @@ def __str__(self): Format: { 'edges':[ - "{ + { 'source_arn': 'string', 'destination_arn': 'string', 'association_type': 'string' - }", + }, ... ], 'vertices':[ - "{ + { 'arn': 'string', 'lineage_entity': 'string', 'lineage_source': 'string', '_session': - }", + }, ... ], 'startarn':[ @@ -271,7 +271,7 @@ def _import_visual_modules(self): return cyto, JupyterDash, html, Input, Output - def _get_verts(self): + def _covert_vertices_to_tuples(self): """Convert vertices to tuple format for visualizer.""" verts = [] # get vertex info in the form of (id, label, class) @@ -283,7 +283,7 @@ def _get_verts(self): verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) return verts - def _get_edges(self): + def _covert_edges_to_tuples(self): """Convert edges to tuple format for visualizer.""" edges = [] # get edge info in the form of (source, target, label) @@ -291,16 +291,11 @@ def _get_edges(self): edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) return edges - def visualize(self): - """Visualize lineage query result.""" - cyto, JupyterDash, html, Input, Output = self._import_visual_modules() - - cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts - app = JupyterDash(__name__) - + def _get_visualization_elements(self): + """Get elements for visualization.""" # get vertices and edges info for graph - verts = self._get_verts() - edges = self._get_edges() + verts = self._covert_vertices_to_tuples() + edges = self._covert_edges_to_tuples() nodes = [ {"data": {"id": id, "label": label}, "classes": classes} for id, label, classes in verts @@ -313,6 +308,17 @@ def visualize(self): elements = nodes + edges + return elements + + def visualize(self): + """Visualize lineage query result.""" + cyto, JupyterDash, html, Input, Output = self._import_visual_modules() + + cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts + app = JupyterDash(__name__) + + elements = self._get_visualization_elements() + app.layout = html.Div( [ cyto.Cytoscape( @@ -461,15 +467,17 @@ def visualize(self): ) @app.callback(Output("cytoscape-graph", "elements"), - Input("cytoscape-graph", "tapNodeData")) - def selectNode(data): - for n in nodes: - if data != None and n["data"]["id"] == data["id"]: + Input("cytoscape-graph", "tapNodeData"), + Input("cytoscape-graph", "elements")) + def selectNode(tapData, elements): + for n in elements: + if tapData != None and n["data"]["id"] == tapData["id"]: + # if is tapped node, add "select" class to node n["classes"] += " select" - else: + elif "classes" in n: + # remove "select" class in "classes" if node not selected n["classes"] = n["classes"].replace("select", "") - elements = nodes + edges return elements return app.run_server(mode="inline") From c135ab957c996a665cae8e2818ad79f7516580b0 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Tue, 19 Jul 2022 12:19:41 -0700 Subject: [PATCH 11/15] add DashVisualizer class --- src/sagemaker/lineage/query.py | 313 +++++++++++++++++++-------------- 1 file changed, 177 insertions(+), 136 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 4fb62bad37..8201f0647b 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -201,127 +201,55 @@ def _artifact_to_lineage_object(self): return Artifact.load(artifact_arn=self.arn, sagemaker_session=self._session) -class LineageQueryResult(object): - """A wrapper around the results of a lineage query.""" - - def __init__( - self, - edges: List[Edge] = None, - vertices: List[Vertex] = None, - startarn: List[str] = None, - ): - """Init for LineageQueryResult. - - Args: - edges (List[Edge]): The edges of the query result. - vertices (List[Vertex]): The vertices of the query result. - """ - self.edges = [] - self.vertices = [] - self.startarn = [] - - if edges is not None: - self.edges = edges - - if vertices is not None: - self.vertices = vertices - - if startarn is not None: - self.startarn = startarn - - def __str__(self): - """Define string representation of ``LineageQueryResult``. - - Format: - { - 'edges':[ - { - 'source_arn': 'string', 'destination_arn': 'string', - 'association_type': 'string' - }, - ... - ], - 'vertices':[ - { - 'arn': 'string', 'lineage_entity': 'string', - 'lineage_source': 'string', - '_session': - }, - ... - ], - 'startarn':[ - 'string', - ... - ] - } +class DashVisualizer(object): + """Create object used for visualizing graph using Dash library.""" - """ - result_dict = vars(self) - return str({k: [str(val) for val in v] for k, v in result_dict.items()}) + def __init__(self): + """Init for DashVisualizer.""" + # import visualization packages + self.cyto, self.JupyterDash, self.html, self.Input, self.Output = self._import_visual_modules() def _import_visual_modules(self): """Import modules needed for visualization.""" - import dash_cytoscape as cyto + try: + import dash_cytoscape as cyto + except ImportError as e: + print(e) + print("Try: pip install dash-cytoscape") + raise + + try: + from jupyter_dash import JupyterDash + except ImportError as e: + print(e) + print("Try: pip install jupyter-dash") + raise + + try: + from dash import html + except ImportError as e: + print(e) + print("Try: pip install dash") + raise + + try: + from dash.dependencies import Input, Output + except ImportError as e: + print(e) + print("Try: pip install dash") + raise - from jupyter_dash import JupyterDash - - from dash import html - - from dash.dependencies import Input, Output return cyto, JupyterDash, html, Input, Output - def _covert_vertices_to_tuples(self): - """Convert vertices to tuple format for visualizer.""" - verts = [] - # get vertex info in the form of (id, label, class) - for vert in self.vertices: - if vert.arn in self.startarn: - # add "startarn" class to node if arn is a startarn - verts.append((vert.arn, vert.lineage_source, vert.lineage_entity + " startarn")) - else: - verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) - return verts - - def _covert_edges_to_tuples(self): - """Convert edges to tuple format for visualizer.""" - edges = [] - # get edge info in the form of (source, target, label) - for edge in self.edges: - edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) - return edges - - def _get_visualization_elements(self): - """Get elements for visualization.""" - # get vertices and edges info for graph - verts = self._covert_vertices_to_tuples() - edges = self._covert_edges_to_tuples() - - nodes = [ - {"data": {"id": id, "label": label}, "classes": classes} for id, label, classes in verts - ] - - edges = [ - {"data": {"source": source, "target": target, "label": label}} - for source, target, label in edges - ] - - elements = nodes + edges + def _get_app(self, elements): + """Create JupyterDash app for interactivity on Jupyter notebook.""" + app = self.JupyterDash(__name__) + self.cyto.load_extra_layouts() - return elements - - def visualize(self): - """Visualize lineage query result.""" - cyto, JupyterDash, html, Input, Output = self._import_visual_modules() - - cyto.load_extra_layouts() # load "klay" layout (hierarchical layout) from extra layouts - app = JupyterDash(__name__) - - elements = self._get_visualization_elements() - - app.layout = html.Div( + app.layout = self.html.Div( [ - cyto.Cytoscape( + self.cyto.Cytoscape( id="cytoscape-graph", elements=elements, style={"width": "85%", "height": "350px", 'display': 'inline-block', 'border-width': '1vw', "border-color": "#232f3e"}, @@ -369,9 +297,9 @@ def visualize(self): ], responsive=True, ), - html.Div([ - html.Div([ - html.Div( + self.html.Div([ + self.html.Div([ + self.html.Div( style={ 'background-color': "#f6cf61", 'width': '1.5vw', @@ -379,17 +307,17 @@ def visualize(self): 'display': 'inline-block' } ), - html.Div( + self.html.Div( style={ 'width': '0.5vw', 'height': '1.5vw', 'display': 'inline-block' } ), - html.Div(' Trial Component', style={'display': 'inline-block', "font-size": "1.5vw"}), + self.html.Div(' Trial Component', style={'display': 'inline-block', "font-size": "1.5vw"}), ]), - html.Div([ - html.Div( + self.html.Div([ + self.html.Div( style={ 'background-color': "#ff9900", 'width': '1.5vw', @@ -397,17 +325,17 @@ def visualize(self): 'display': 'inline-block' } ), - html.Div( + self.html.Div( style={ 'width': '0.5vw', 'height': '1.5vw', 'display': 'inline-block' } ), - html.Div(' Context', style={'display': 'inline-block', "font-size": "1.5vw"}), + self.html.Div(' Context', style={'display': 'inline-block', "font-size": "1.5vw"}), ]), - html.Div([ - html.Div( + self.html.Div([ + self.html.Div( style={ 'background-color': "#88c396", 'width': '1.5vw', @@ -415,17 +343,17 @@ def visualize(self): 'display': 'inline-block' } ), - html.Div( + self.html.Div( style={ 'width': '0.5vw', 'height': '1.5vw', 'display': 'inline-block' } ), - html.Div(' Action', style={'display': 'inline-block', "font-size": "1.5vw"}), + self.html.Div(' Action', style={'display': 'inline-block', "font-size": "1.5vw"}), ]), - html.Div([ - html.Div( + self.html.Div([ + self.html.Div( style={ 'background-color': "#146eb4", 'width': '1.5vw', @@ -433,17 +361,17 @@ def visualize(self): 'display': 'inline-block' } ), - html.Div( + self.html.Div( style={ 'width': '0.5vw', 'height': '1.5vw', 'display': 'inline-block' } ), - html.Div(' Artifact', style={'display': 'inline-block', "font-size": "1.5vw"}), + self.html.Div(' Artifact', style={'display': 'inline-block', "font-size": "1.5vw"}), ]), - html.Div([ - html.Div( + self.html.Div([ + self.html.Div( "★", style={ 'background-color': "white", @@ -453,22 +381,22 @@ def visualize(self): "font-size": "1.5vw" } ), - html.Div( + self.html.Div( style={ 'width': '0.5vw', 'height': '1.5vw', 'display': 'inline-block' } ), - html.Div('StartArn', style={'display': 'inline-block', "font-size": "1.5vw"}), + self.html.Div('StartArn', style={'display': 'inline-block', "font-size": "1.5vw"}), ]), ], style={'width': '15%', 'display': 'inline-block', "font-size": "1vw", "font-family": "verdana", "vertical-align": "top"}) ] ) - @app.callback(Output("cytoscape-graph", "elements"), - Input("cytoscape-graph", "tapNodeData"), - Input("cytoscape-graph", "elements")) + @app.callback(self.Output("cytoscape-graph", "elements"), + self.Input("cytoscape-graph", "tapNodeData"), + self.Input("cytoscape-graph", "elements")) def selectNode(tapData, elements): for n in elements: if tapData != None and n["data"]["id"] == tapData["id"]: @@ -480,8 +408,121 @@ def selectNode(tapData, elements): return elements - return app.run_server(mode="inline") + return app + + def render(self, elements, mode): + """Render graph for lineage query result.""" + app = self._get_app(elements) + + return app.run_server(mode=mode) + +class LineageQueryResult(object): + """A wrapper around the results of a lineage query.""" + + def __init__( + self, + edges: List[Edge] = None, + vertices: List[Vertex] = None, + startarn: List[str] = None, + ): + """Init for LineageQueryResult. + + Args: + edges (List[Edge]): The edges of the query result. + vertices (List[Vertex]): The vertices of the query result. + """ + self.edges = [] + self.vertices = [] + self.startarn = [] + + if edges is not None: + self.edges = edges + + if vertices is not None: + self.vertices = vertices + + if startarn is not None: + self.startarn = startarn + + def __str__(self): + """Define string representation of ``LineageQueryResult``. + + Format: + { + 'edges':[ + { + 'source_arn': 'string', 'destination_arn': 'string', + 'association_type': 'string' + }, + ... + ], + 'vertices':[ + { + 'arn': 'string', 'lineage_entity': 'string', + 'lineage_source': 'string', + '_session': + }, + ... + ], + 'startarn':[ + 'string', + ... + ] + } + + """ + result_dict = vars(self) + return str({k: [str(val) for val in v] for k, v in result_dict.items()}) + + def _covert_vertices_to_tuples(self): + """Convert vertices to tuple format for visualizer.""" + verts = [] + # get vertex info in the form of (id, label, class) + for vert in self.vertices: + if vert.arn in self.startarn: + # add "startarn" class to node if arn is a startarn + verts.append((vert.arn, vert.lineage_source, vert.lineage_entity + " startarn")) + else: + verts.append((vert.arn, vert.lineage_source, vert.lineage_entity)) + return verts + + def _covert_edges_to_tuples(self): + """Convert edges to tuple format for visualizer.""" + edges = [] + # get edge info in the form of (source, target, label) + for edge in self.edges: + edges.append((edge.source_arn, edge.destination_arn, edge.association_type)) + return edges + + def _get_visualization_elements(self): + """Get elements for visualization.""" + # get vertices and edges info for graph + verts = self._covert_vertices_to_tuples() + edges = self._covert_edges_to_tuples() + + nodes = [ + {"data": {"id": id, "label": label}, "classes": classes} for id, label, classes in verts + ] + + edges = [ + {"data": {"source": source, "target": target, "label": label}} + for source, target, label in edges + ] + + elements = nodes + edges + + return elements + + def visualize(self): + """Visualize lineage query result.""" + elements = self._get_visualization_elements() + + # initialize DashVisualizer instance to render graph & interactive components + dash_vis = DashVisualizer() + + dash_server = dash_vis.render(elements=elements, mode="inline") + return dash_server class LineageFilter(object): """A filter used in a lineage query.""" From daa5cc438a9677e07705dcc8b63760b0f598991d Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Tue, 19 Jul 2022 12:29:07 -0700 Subject: [PATCH 12/15] style check --- src/sagemaker/lineage/query.py | 251 +++++++++++++++++++-------------- 1 file changed, 149 insertions(+), 102 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 8201f0647b..d51b7c685d 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -207,7 +207,13 @@ class DashVisualizer(object): def __init__(self): """Init for DashVisualizer.""" # import visualization packages - self.cyto, self.JupyterDash, self.html, self.Input, self.Output = self._import_visual_modules() + ( + self.cyto, + self.JupyterDash, + self.html, + self.Input, + self.Output, + ) = self._import_visual_modules() def _import_visual_modules(self): """Import modules needed for visualization.""" @@ -239,7 +245,6 @@ def _import_visual_modules(self): print("Try: pip install dash") raise - return cyto, JupyterDash, html, Input, Output def _get_app(self, elements): @@ -252,7 +257,13 @@ def _get_app(self, elements): self.cyto.Cytoscape( id="cytoscape-graph", elements=elements, - style={"width": "85%", "height": "350px", 'display': 'inline-block', 'border-width': '1vw', "border-color": "#232f3e"}, + style={ + "width": "85%", + "height": "350px", + "display": "inline-block", + "border-width": "1vw", + "border-color": "#232f3e", + }, layout={"name": "klay"}, stylesheet=[ { @@ -263,9 +274,9 @@ def _get_app(self, elements): "height": "10vw", "width": "10vw", "border-width": "0.8", - "border-opacity": "0", + "border-opacity": "0", "border-color": "#232f3e", - "font-family": "verdana" + "font-family": "verdana", }, }, { @@ -279,13 +290,11 @@ def _get_app(self, elements): "width": "1", "curve-style": "bezier", "control-point-step-size": "15", - # "taxi-direction": "rightward", - # "taxi-turn": "50%", "target-arrow-color": "gray", "target-arrow-shape": "triangle", "line-color": "gray", "arrow-scale": "0.5", - "font-family": "verdana" + "font-family": "verdana", }, }, {"selector": ".Artifact", "style": {"background-color": "#146eb4"}}, @@ -293,113 +302,149 @@ def _get_app(self, elements): {"selector": ".TrialComponent", "style": {"background-color": "#f6cf61"}}, {"selector": ".Action", "style": {"background-color": "#88c396"}}, {"selector": ".startarn", "style": {"shape": "star"}}, - {"selector": ".select", "style": { "border-opacity": "0.7"}}, + {"selector": ".select", "style": {"border-opacity": "0.7"}}, ], responsive=True, ), - self.html.Div([ - self.html.Div([ - self.html.Div( - style={ - 'background-color': "#f6cf61", - 'width': '1.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } - ), - self.html.Div( - style={ - 'width': '0.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } - ), - self.html.Div(' Trial Component', style={'display': 'inline-block', "font-size": "1.5vw"}), - ]), - self.html.Div([ - self.html.Div( - style={ - 'background-color': "#ff9900", - 'width': '1.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } - ), - self.html.Div( - style={ - 'width': '0.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } - ), - self.html.Div(' Context', style={'display': 'inline-block', "font-size": "1.5vw"}), - ]), - self.html.Div([ - self.html.Div( - style={ - 'background-color': "#88c396", - 'width': '1.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } - ), + self.html.Div( + [ self.html.Div( - style={ - 'width': '0.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } + [ + self.html.Div( + style={ + "background-color": "#f6cf61", + "width": "1.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + style={ + "width": "0.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + " Trial Component", + style={"display": "inline-block", "font-size": "1.5vw"}, + ), + ] ), - self.html.Div(' Action', style={'display': 'inline-block', "font-size": "1.5vw"}), - ]), - self.html.Div([ self.html.Div( - style={ - 'background-color': "#146eb4", - 'width': '1.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } + [ + self.html.Div( + style={ + "background-color": "#ff9900", + "width": "1.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + style={ + "width": "0.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + " Context", + style={"display": "inline-block", "font-size": "1.5vw"}, + ), + ] ), self.html.Div( - style={ - 'width': '0.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } + [ + self.html.Div( + style={ + "background-color": "#88c396", + "width": "1.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + style={ + "width": "0.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + " Action", + style={"display": "inline-block", "font-size": "1.5vw"}, + ), + ] ), - self.html.Div(' Artifact', style={'display': 'inline-block', "font-size": "1.5vw"}), - ]), - self.html.Div([ self.html.Div( - "★", - style={ - 'background-color': "white", - 'width': '1.5vw', - 'height': '1.5vw', - 'display': 'inline-block', - "font-size": "1.5vw" - } + [ + self.html.Div( + style={ + "background-color": "#146eb4", + "width": "1.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + style={ + "width": "0.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + " Artifact", + style={"display": "inline-block", "font-size": "1.5vw"}, + ), + ] ), self.html.Div( - style={ - 'width': '0.5vw', - 'height': '1.5vw', - 'display': 'inline-block' - } + [ + self.html.Div( + "★", + style={ + "background-color": "white", + "width": "1.5vw", + "height": "1.5vw", + "display": "inline-block", + "font-size": "1.5vw", + }, + ), + self.html.Div( + style={ + "width": "0.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + "StartArn", + style={"display": "inline-block", "font-size": "1.5vw"}, + ), + ] ), - self.html.Div('StartArn', style={'display': 'inline-block', "font-size": "1.5vw"}), - ]), - ], style={'width': '15%', 'display': 'inline-block', "font-size": "1vw", "font-family": "verdana", "vertical-align": "top"}) + ], + style={ + "width": "15%", + "display": "inline-block", + "font-size": "1vw", + "font-family": "verdana", + "vertical-align": "top", + }, + ), ] ) - @app.callback(self.Output("cytoscape-graph", "elements"), - self.Input("cytoscape-graph", "tapNodeData"), - self.Input("cytoscape-graph", "elements")) + @app.callback( + self.Output("cytoscape-graph", "elements"), + self.Input("cytoscape-graph", "tapNodeData"), + self.Input("cytoscape-graph", "elements"), + ) def selectNode(tapData, elements): for n in elements: - if tapData != None and n["data"]["id"] == tapData["id"]: + if tapData is not None and n["data"]["id"] == tapData["id"]: # if is tapped node, add "select" class to node n["classes"] += " select" elif "classes" in n: @@ -416,6 +461,7 @@ def render(self, elements, mode): return app.run_server(mode=mode) + class LineageQueryResult(object): """A wrapper around the results of a lineage query.""" @@ -450,18 +496,18 @@ def __str__(self): Format: { 'edges':[ - { + "{ 'source_arn': 'string', 'destination_arn': 'string', 'association_type': 'string' - }, + }", ... ], 'vertices':[ - { + "{ 'arn': 'string', 'lineage_entity': 'string', 'lineage_source': 'string', '_session': - }, + }", ... ], 'startarn':[ @@ -515,7 +561,7 @@ def _get_visualization_elements(self): def visualize(self): """Visualize lineage query result.""" - elements = self._get_visualization_elements() + elements = self._get_visualization_elements() # initialize DashVisualizer instance to render graph & interactive components dash_vis = DashVisualizer() @@ -524,6 +570,7 @@ def visualize(self): return dash_server + class LineageFilter(object): """A filter used in a lineage query.""" From 28a9eeb75efa77121e889053c62a5d21662ff86c Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Tue, 19 Jul 2022 14:23:29 -0700 Subject: [PATCH 13/15] feature: query lineage visualizer advanced styling & interactive component handle --- src/sagemaker/lineage/query.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index d51b7c685d..8087e4720f 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -258,7 +258,7 @@ def _get_app(self, elements): id="cytoscape-graph", elements=elements, style={ - "width": "85%", + "width": "84%", "height": "350px", "display": "inline-block", "border-width": "1vw", @@ -306,6 +306,15 @@ def _get_app(self, elements): ], responsive=True, ), + self.html.Div( + style={ + "width": "0.5%", + "display": "inline-block", + "font-size": "1vw", + "font-family": "verdana", + "vertical-align": "top", + }, + ), self.html.Div( [ self.html.Div( @@ -427,7 +436,6 @@ def _get_app(self, elements): ), ], style={ - "width": "15%", "display": "inline-block", "font-size": "1vw", "font-family": "verdana", From 7c2b0c314b042838e4da482d172f34c7350d831b Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Wed, 20 Jul 2022 16:41:15 -0700 Subject: [PATCH 14/15] add functions that generate html components and style selectors --- src/sagemaker/lineage/query.py | 169 +++++++++------------------------ 1 file changed, 45 insertions(+), 124 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 8087e4720f..8e1ec3000e 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -215,6 +215,13 @@ def __init__(self): self.Output, ) = self._import_visual_modules() + self.entity_color = { + "TrialComponent": "#f6cf61", + "Context": "#ff9900", + "Action": "#88c396", + "Artifact": "#146eb4", + } + def _import_visual_modules(self): """Import modules needed for visualization.""" try: @@ -247,6 +254,38 @@ def _import_visual_modules(self): return cyto, JupyterDash, html, Input, Output + def _create_legend_component(self, text, color, colorText=""): + """Create legend component div.""" + return self.html.Div( + [ + self.html.Div( + colorText, + style={ + "background-color": color, + "width": "1.5vw", + "height": "1.5vw", + "display": "inline-block", + "font-size": "1.5vw", + }, + ), + self.html.Div( + style={ + "width": "0.5vw", + "height": "1.5vw", + "display": "inline-block", + } + ), + self.html.Div( + text, + style={"display": "inline-block", "font-size": "1.5vw"}, + ), + ] + ) + + def _create_entity_selector(self, entity_name, color): + """Create selector for each lineage entity.""" + return {"selector": "." + entity_name, "style": {"background-color": color}} + def _get_app(self, elements): """Create JupyterDash app for interactivity on Jupyter notebook.""" app = self.JupyterDash(__name__) @@ -254,6 +293,7 @@ def _get_app(self, elements): app.layout = self.html.Div( [ + # graph section self.cyto.Cytoscape( id="cytoscape-graph", elements=elements, @@ -297,13 +337,10 @@ def _get_app(self, elements): "font-family": "verdana", }, }, - {"selector": ".Artifact", "style": {"background-color": "#146eb4"}}, - {"selector": ".Context", "style": {"background-color": "#ff9900"}}, - {"selector": ".TrialComponent", "style": {"background-color": "#f6cf61"}}, - {"selector": ".Action", "style": {"background-color": "#88c396"}}, {"selector": ".startarn", "style": {"shape": "star"}}, {"selector": ".select", "style": {"border-opacity": "0.7"}}, - ], + ] + + [self._create_entity_selector(k, v) for k, v in self.entity_color.items()], responsive=True, ), self.html.Div( @@ -315,126 +352,10 @@ def _get_app(self, elements): "vertical-align": "top", }, ), + # legend section self.html.Div( - [ - self.html.Div( - [ - self.html.Div( - style={ - "background-color": "#f6cf61", - "width": "1.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - style={ - "width": "0.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - " Trial Component", - style={"display": "inline-block", "font-size": "1.5vw"}, - ), - ] - ), - self.html.Div( - [ - self.html.Div( - style={ - "background-color": "#ff9900", - "width": "1.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - style={ - "width": "0.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - " Context", - style={"display": "inline-block", "font-size": "1.5vw"}, - ), - ] - ), - self.html.Div( - [ - self.html.Div( - style={ - "background-color": "#88c396", - "width": "1.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - style={ - "width": "0.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - " Action", - style={"display": "inline-block", "font-size": "1.5vw"}, - ), - ] - ), - self.html.Div( - [ - self.html.Div( - style={ - "background-color": "#146eb4", - "width": "1.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - style={ - "width": "0.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - " Artifact", - style={"display": "inline-block", "font-size": "1.5vw"}, - ), - ] - ), - self.html.Div( - [ - self.html.Div( - "★", - style={ - "background-color": "white", - "width": "1.5vw", - "height": "1.5vw", - "display": "inline-block", - "font-size": "1.5vw", - }, - ), - self.html.Div( - style={ - "width": "0.5vw", - "height": "1.5vw", - "display": "inline-block", - } - ), - self.html.Div( - "StartArn", - style={"display": "inline-block", "font-size": "1.5vw"}, - ), - ] - ), - ], + [self._create_legend_component(k, v) for k, v in self.entity_color.items()] + + [self._create_legend_component("StartArn", "#ffffff", "★")], style={ "display": "inline-block", "font-size": "1vw", From 41d2453d9c3bd3496204d2747c325206a3772ff8 Mon Sep 17 00:00:00 2001 From: Yi-Ting Lee Date: Thu, 21 Jul 2022 11:24:34 -0700 Subject: [PATCH 15/15] inject graph data to DashVisualizer task --- src/sagemaker/lineage/query.py | 62 +++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/src/sagemaker/lineage/query.py b/src/sagemaker/lineage/query.py index 8e1ec3000e..1ea7baecb6 100644 --- a/src/sagemaker/lineage/query.py +++ b/src/sagemaker/lineage/query.py @@ -204,7 +204,7 @@ def _artifact_to_lineage_object(self): class DashVisualizer(object): """Create object used for visualizing graph using Dash library.""" - def __init__(self): + def __init__(self, graph_styles): """Init for DashVisualizer.""" # import visualization packages ( @@ -215,12 +215,7 @@ def __init__(self): self.Output, ) = self._import_visual_modules() - self.entity_color = { - "TrialComponent": "#f6cf61", - "Context": "#ff9900", - "Action": "#88c396", - "Artifact": "#146eb4", - } + self.graph_styles = graph_styles def _import_visual_modules(self): """Import modules needed for visualization.""" @@ -254,12 +249,19 @@ def _import_visual_modules(self): return cyto, JupyterDash, html, Input, Output - def _create_legend_component(self, text, color, colorText=""): + def _create_legend_component(self, style): """Create legend component div.""" + text = style["name"] + symbol = "" + color = "#ffffff" + if style["isShape"] == "False": + color = style["style"]["background-color"] + else: + symbol = style["symbol"] return self.html.Div( [ self.html.Div( - colorText, + symbol, style={ "background-color": color, "width": "1.5vw", @@ -282,9 +284,9 @@ def _create_legend_component(self, text, color, colorText=""): ] ) - def _create_entity_selector(self, entity_name, color): + def _create_entity_selector(self, entity_name, style): """Create selector for each lineage entity.""" - return {"selector": "." + entity_name, "style": {"background-color": color}} + return {"selector": "." + entity_name, "style": style["style"]} def _get_app(self, elements): """Create JupyterDash app for interactivity on Jupyter notebook.""" @@ -337,10 +339,9 @@ def _get_app(self, elements): "font-family": "verdana", }, }, - {"selector": ".startarn", "style": {"shape": "star"}}, {"selector": ".select", "style": {"border-opacity": "0.7"}}, ] - + [self._create_entity_selector(k, v) for k, v in self.entity_color.items()], + + [self._create_entity_selector(k, v) for k, v in self.graph_styles.items()], responsive=True, ), self.html.Div( @@ -354,8 +355,7 @@ def _get_app(self, elements): ), # legend section self.html.Div( - [self._create_legend_component(k, v) for k, v in self.entity_color.items()] - + [self._create_legend_component("StartArn", "#ffffff", "★")], + [self._create_legend_component(v) for k, v in self.graph_styles.items()], style={ "display": "inline-block", "font-size": "1vw", @@ -492,8 +492,38 @@ def visualize(self): """Visualize lineage query result.""" elements = self._get_visualization_elements() + lineage_graph = { + # nodes can have shape / color + "TrialComponent": { + "name": "Trial Component", + "style": {"background-color": "#f6cf61"}, + "isShape": "False", + }, + "Context": { + "name": "Context", + "style": {"background-color": "#ff9900"}, + "isShape": "False", + }, + "Action": { + "name": "Action", + "style": {"background-color": "#88c396"}, + "isShape": "False", + }, + "Artifact": { + "name": "Artifact", + "style": {"background-color": "#146eb4"}, + "isShape": "False", + }, + "StartArn": { + "name": "StartArn", + "style": {"shape": "star"}, + "isShape": "True", + "symbol": "★", # shape symbol for legend + }, + } + # initialize DashVisualizer instance to render graph & interactive components - dash_vis = DashVisualizer() + dash_vis = DashVisualizer(lineage_graph) dash_server = dash_vis.render(elements=elements, mode="inline")