diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py new file mode 100644 index 0000000000..2b05e7b0cf --- /dev/null +++ b/src/sagemaker/amtviz/__init__.py @@ -0,0 +1,17 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Placeholder docstring""" +from __future__ import absolute_import + +from sagemaker.amtviz.visualization import visualize_tuning_job +__all__ = ['visualize_tuning_job'] diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py new file mode 100644 index 0000000000..f84457f9da --- /dev/null +++ b/src/sagemaker/amtviz/job_metrics.py @@ -0,0 +1,186 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Helper functions to retrieve job metrics from CloudWatch.""" +from __future__ import absolute_import + +from datetime import datetime, timedelta +from typing import Callable, List, Optional, Tuple, Dict, Any +import hashlib +import os +from pathlib import Path + +import logging +import pandas as pd +import numpy as np +import boto3 + +logger = logging.getLogger(__name__) + +cw = boto3.client("cloudwatch") +sm = boto3.client("sagemaker") + + +def disk_cache(outer: Callable) -> Callable: + """A decorator that implements disk-based caching for CloudWatch metrics data. + + This decorator caches the output of the wrapped function to disk in JSON Lines format. + It creates a cache key using MD5 hash of the function arguments and stores the data + in the user's home directory under .amtviz/cw_metrics_cache/. + + Args: + outer (Callable): The function to be wrapped. Must return a pandas DataFrame + containing CloudWatch metrics data. + + Returns: + Callable: A wrapper function that implements the caching logic. + """ + + def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: + key_input = str(args) + str(kwargs) + # nosec b303 - Not used for cryptography, but to create lookup key + key = hashlib.md5(key_input.encode("utf-8")).hexdigest() + cache_dir = Path.home().joinpath(".amtviz/cw_metrics_cache") + fn = f"{cache_dir}/req_{key}.jsonl.gz" + if Path(fn).exists(): + try: + df = pd.read_json(fn, lines=True) + logger.debug("H", end="") + df["ts"] = pd.to_datetime(df["ts"]) + df["ts"] = df["ts"].dt.tz_localize(None) + # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + df["rel_ts"] = pd.to_datetime(df["rel_ts"]) + df["rel_ts"] = df["rel_ts"].dt.tz_localize(None) + return df + except KeyError: + # Empty file leads to empty df, hence no df['ts'] possible + pass + # nosec b110 - doesn't matter why we could not load it. + except BaseException as e: + logger.error("\nException: %s - %s", type(e), e) + + logger.debug("M", end="") + df = outer(*args, **kwargs) + assert isinstance(df, pd.DataFrame), "Only caching Pandas DataFrames." + + os.makedirs(cache_dir, exist_ok=True) + df.to_json(fn, orient="records", date_format="iso", lines=True) + + return df + + return inner + + +def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> Dict[str, Any]: + """Returns a CloudWatch metric data query template.""" + return { + "Id": metric_name.lower().replace(":", "_").replace("-", "_"), + "MetricStat": { + "Stat": "Average", + "Metric": { + "Namespace": "/aws/sagemaker/TrainingJobs", + "MetricName": metric_name, + "Dimensions": [ + {"Name": dim_name, "Value": dim_value}, + ], + }, + "Period": 60, + }, + "ReturnData": True, + } + + +def _get_metric_data( + queries: List[Dict[str, Any]], + start_time: datetime, + end_time: datetime +) -> pd.DataFrame: + """Fetches CloudWatch metrics between timestamps and returns a DataFrame with selected columns.""" + start_time = start_time - timedelta(hours=1) + end_time = end_time + timedelta(hours=1) + response = cw.get_metric_data(MetricDataQueries=queries, StartTime=start_time, EndTime=end_time) + + df = pd.DataFrame() + if "MetricDataResults" not in response: + return df + + for metric_data in response["MetricDataResults"]: + values = metric_data["Values"] + ts = np.array(metric_data["Timestamps"], dtype=np.datetime64) + labels = [metric_data["Label"]] * len(values) + + df = pd.concat([df, pd.DataFrame({"value": values, "ts": ts, "label": labels})]) + + # We now calculate the relative time based on the first actual observed + # time stamps, not the potentially start time that we used to scope our CW + # API call. The difference could be for example startup times or waiting + # for Spot. + if not df.empty: + df["rel_ts"] = datetime.fromtimestamp(1) + (df["ts"] - df["ts"].min()) # pyright: ignore + return df + + +@disk_cache +def _collect_metrics( + dimensions: List[Tuple[str, str]], + start_time: datetime, + end_time: Optional[datetime] +) -> pd.DataFrame: + """Collects SageMaker training job metrics from CloudWatch based on given dimensions and time range.""" + df = pd.DataFrame() + for dim_name, dim_value in dimensions: + response = cw.list_metrics( + Namespace="/aws/sagemaker/TrainingJobs", + Dimensions=[ + {"Name": dim_name, "Value": dim_value}, + ], + ) + if not response["Metrics"]: + continue + metric_names = [metric["MetricName"] for metric in response["Metrics"]] + if not metric_names: + # No metric data yet, or not any longer, because the data were aged out + continue + metric_data_queries = [ + _metric_data_query_tpl(metric_name, dim_name, dim_value) for metric_name in metric_names + ] + df = pd.concat([df, _get_metric_data(metric_data_queries, start_time, end_time)]) + + return df + + +def get_cw_job_metrics( + job_name: str, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None +) -> pd.DataFrame: + """Retrieves CloudWatch metrics for a SageMaker training job. + + Args: + job_name (str): Name of the SageMaker training job. + start_time (datetime, optional): Start time for metrics collection. + Defaults to now - 4 hours. + end_time (datetime, optional): End time for metrics collection. + Defaults to start_time + 4 hours. + + Returns: + pd.DataFrame: Metrics data with columns for value, timestamp, and metric name. + Results are cached to disk for improved performance. + """ + dimensions = [ + ("TrainingJobName", job_name), + ("Host", job_name + "/algo-1"), + ] + # If not given, use reasonable defaults for start and end time + start_time = start_time or datetime.now() - timedelta(hours=4) + end_time = end_time or start_time + timedelta(hours=4) + return _collect_metrics(dimensions, start_time, end_time) diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py new file mode 100644 index 0000000000..bb66195771 --- /dev/null +++ b/src/sagemaker/amtviz/visualization.py @@ -0,0 +1,850 @@ +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +""" +This module provides visualization capabilities for SageMaker hyperparameter tuning jobs. + +It contains utilities to create interactive visualizations of hyperparameter tuning results +using Altair charts. The module enables users to analyze and understand the performance +of their hyperparameter optimization experiments through various visual representations +including: +- Progress of objective metrics over time +- Distribution of results +- Relationship between hyperparameters and objective values +- Training job metrics and instance utilization +- Comparative analysis across multiple tuning jobs + +Main Features: + - Visualize single or multiple hyperparameter tuning jobs + - Display training job metrics from CloudWatch + - Support for both completed and in-progress tuning jobs + - Interactive filtering and highlighting of data points + - CPU, memory, and GPU utilization visualization + - Advanced visualization options for detailed analysis + +Primary Classes and Functions: + - visualize_tuning_job: Main function to create visualizations for tuning jobs + - create_charts: Core chart creation functionality + - get_job_analytics_data: Retrieves and processes tuning job data + +Dependencies: + - altair: For creating interactive visualizations + - pandas: For data manipulation and analysis + - boto3: For AWS service interaction + - sagemaker: For accessing SageMaker resources +""" +from __future__ import absolute_import + +from typing import Union, List, Optional, Tuple +import os +import warnings +import logging +import altair as alt +import pandas as pd +import numpy as np +import boto3 +import sagemaker +from sagemaker.amtviz.job_metrics import get_cw_job_metrics + +warnings.filterwarnings("ignore") +logger = logging.getLogger(__name__) + +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) +pd.set_option("display.max_colwidth", None) # Don't truncate TrainingJobName + + +alt.data_transformers.disable_max_rows() +altair_renderer = os.getenv("ALTAIR_RENDERER", "default") +logger.info("Setting altair renderer to %s.", altair_renderer) +alt.renderers.enable(altair_renderer) + + +sm = boto3.client("sagemaker") + + +def _columnize(charts: List[alt.Chart], cols: int = 2) -> alt.VConcatChart: + """Arrange charts in columns.""" + return alt.vconcat(*[alt.hconcat(*charts[i : i + cols]) for i in range(0, len(charts), cols)]) + + +def visualize_tuning_job( + tuning_jobs: Union[str, List[str], "sagemaker.tuner.HyperparameterTuner"], + return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False, +) -> Union[alt.Chart, Tuple[alt.Chart, pd.DataFrame, pd.DataFrame]]: + """ + Visualize SageMaker hyperparameter tuning jobs. + + Args: + tuning_jobs: Single tuning job or list of tuning jobs (name or HyperparameterTuner object) + return_dfs: Whether to return the underlying DataFrames + job_metrics: List of additional job metrics to include + trials_only: Whether to only show trials data + advanced: Whether to show advanced visualizations + + Returns: + If return_dfs is False, returns Altair chart + If return_dfs is True, returns tuple of (chart, trials_df, full_df) + """ + + trials_df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data(tuning_jobs) + + try: + from IPython import get_ipython, display + if get_ipython(): + # Running in a Jupyter Notebook + display(trials_df.head(10)) + else: + # Running in a non-Jupyter environment + logger.info(trials_df.head(10).to_string()) + except ImportError: + # Not running in a Jupyter Notebook + logger.info(trials_df.head(10).to_string()) + + full_df = ( + _prepare_consolidated_df(trials_df) if not trials_only else pd.DataFrame() + ) + + trials_df.columns = trials_df.columns.map(_clean_parameter_name) + full_df.columns = full_df.columns.map(_clean_parameter_name) + tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] + objective_name = _clean_parameter_name(objective_name) + + charts = create_charts( + trials_df, + tuned_parameters, + full_df, + objective_name, + minimize_objective=is_minimize, + job_metrics=job_metrics, + advanced=advanced, + ) + + if return_dfs: + return charts, trials_df, full_df + return charts + + +def create_charts( + trials_df: pd.DataFrame, + tuning_parameters: List[str], + full_df: pd.DataFrame, + objective_name: str, + minimize_objective: bool, + job_metrics: Optional[List[str]] = None, + highlight_trials: bool = True, + color_trials: bool = False, + advanced: bool = False, +) -> alt.Chart: + """ + Create visualization charts for hyperparameter tuning results. + + Args: + trials_df: DataFrame containing trials data + tuning_parameters: List of hyperparameter names + full_df: DataFrame with consolidated data + objective_name: Name of the objective metric + minimize_objective: Whether objective should be minimized + job_metrics: Additional job metrics to include + highlight_trials: Whether to highlight selected trials + color_trials: Whether to color trials by job + advanced: Whether to show advanced visualizations + + Returns: + Altair chart visualization + """ + + if trials_df.empty: + logger.info("No results available yet.") + return pd.DataFrame() + + if job_metrics is None: + job_metrics = [] + + multiple_tuning_jobs = len(trials_df["TuningJobName"].unique()) > 1 + multiple_job_status = len(trials_df["TrainingJobStatus"].unique()) > 1 + + # Rows, n>1 + # Detail Charts + + brush = alt.selection_interval(encodings=["x"], resolve="intersect", empty=True) + + job_highlight_selection = alt.selection_point( + on="mouseover", + nearest=False, + empty=False, + fields=["TrainingJobName", "TrainingStartTime"], + ) + + # create tooltip + detail_tooltip = [] + for trp in [objective_name] + tuning_parameters: + if trials_df[trp].dtype == np.float64: + trp = alt.Tooltip(trp, format=".2e") + detail_tooltip.append(trp) + + detail_tooltip.append(alt.Tooltip("TrainingStartTime:T", format="%H:%M:%S")) + detail_tooltip.extend(["TrainingJobName", "TrainingJobStatus", "TrainingElapsedTimeSeconds"]) + + # create stroke/stroke-width for tuning_jobs + # and color for training jobs, if wanted + # add coloring of the stroke to highlight correlated + # data points + jobs_props = {"shape": alt.Shape("TrainingJobStatus:N", legend=None)} + + if multiple_tuning_jobs: + jobs_props["strokeWidth"] = alt.StrokeWidthValue(2.0) + jobs_props["stroke"] = alt.Stroke("TuningJobName:N", legend=None) + + if color_trials: + jobs_props["color"] = alt.Color("TrainingJobName:N") + + if highlight_trials: + jobs_props["strokeWidth"] = alt.condition( + job_highlight_selection, + alt.StrokeWidthValue(2.0), + alt.StrokeWidthValue(2.0), + ) + jobs_props["stroke"] = alt.condition( + job_highlight_selection, + alt.StrokeValue("gold"), + alt.Stroke("TuningJobName:N", legend=None) + if multiple_tuning_jobs + else alt.StrokeValue("white"), + ) + + opacity = alt.condition(brush, alt.value(1.0), alt.value(0.35)) + charts = [] + + # Min and max of the objective. This is used in filtered + # charts, so that the filtering does not make the axis + # jump, which would make comparisons harder. + objective_scale = alt.Scale( + domain=( + trials_df[objective_name].min(), + trials_df[objective_name].max(), + ) + ) + + # If we have multiple tuning jobs, we also want to be able + # to discriminate based on the individual tuning job, so + # we just treat them as an additional tuning parameter + tuning_parameters = tuning_parameters.copy() + (["TuningJobName"] if multiple_tuning_jobs else []) + + # If we use early stopping and at least some jobs were + # stopped early, we want to be able to discriminate + # those jobs. + if multiple_job_status: + tuning_parameters.append("TrainingJobStatus") + + def render_detail_charts(): + # To force a tuning job to sample a combination more than once, we + # sometimes introduce a hyperparameter that has no effect. + # It's values are random and without impact, so we omit it from analysis. + ignored_parameters = {"dummy"} + for tuning_parameter in tuning_parameters: + if tuning_parameter in ignored_parameters: + continue + + # Map dataframe's dtype to altair's types and + # adjust scale if necessary + scale_type = "linear" + scale_log_base = 10 + + few_values = len(trials_df[tuning_parameter].unique()) < 8 + parameter_type = "N" # Nominal + dtype = str(trials_df.dtypes[tuning_parameter]) + if "float" in dtype: + parameter_type = "Q" # Quantitative + ratio = (trials_df[tuning_parameter].max() + 1e-10) / ( + trials_df[tuning_parameter].min() + 1e-10 + ) + not_likely_discrete = ( + len(trials_df[tuning_parameter].unique()) > trials_df[tuning_parameter].count() + ) # edge case when both are equal + if few_values and not_likely_discrete: + if ratio > 50: + scale_type = "log" + elif ratio > 10: + scale_type = "log" + scale_log_base = 2 + + elif "int" in dtype or "object" in dtype: + parameter_type = "O" # Ordinal + + x_encoding = alt.X( + f"{tuning_parameter}:{parameter_type}", + scale=alt.Scale( + zero=False, + padding=1, + type=scale_type, + base=scale_log_base, + ), + ) + + # Sync the coloring for categorical hyperparameters + discrete = parameter_type in ["O", "N"] and few_values + + # Detail Chart + charts.append( + alt.Chart(trials_df) + .add_params(brush) + .add_params(job_highlight_selection) + .mark_point(filled=True, size=50) + .encode( + x=x_encoding, + y=alt.Y( + f"{objective_name}:Q", + scale=alt.Scale(zero=False, padding=1), + axis=alt.Axis(title=objective_name), + ), + opacity=opacity, + tooltip=detail_tooltip, + **jobs_props, + ) + ) + + if discrete: + # Individually coloring the values only if we don't already + # use the colors to show the different tuning jobs + logger.info("%s, %s", parameter_type, tuning_parameter) + if not multiple_tuning_jobs: + charts[-1] = charts[-1].encode(color=f"{tuning_parameter}:N") + charts[-1] = ( + ( + charts[-1] + | alt.Chart(trials_df) + .transform_filter(brush) + .transform_density( + objective_name, + bandwidth=0.01, + groupby=[tuning_parameter], + # https://github.com/vega/altair/issues/3203#issuecomment-2141558911 + # Specifying extent no longer necessary (>5.1.2). Leaving the work around in it for now. + extent=[ + trials_df[objective_name].min(), + trials_df[objective_name].max(), + ], + ) + .mark_area(opacity=0.5) + .encode( + x=alt.X( + "value:Q", + title=objective_name, + scale=objective_scale, + ), + y="density:Q", + color=alt.Color( + f"{tuning_parameter}:N", + ), + tooltip=tuning_parameter, + ) + ).properties(title=tuning_parameter) + # .resolve_scale("independent") + # .resolve_legend(color="independent") + ) + + if advanced and parameter_type == "Q": + # Adding tick marks to the detail charts with quantitative hyperparameters + x_enc = x_encoding.copy() + charts[-1].encoding.x.title = None + charts[-1].encoding.x.axis = alt.Axis(labels=False) + + charts[-1] = charts[-1] & alt.Chart(trials_df).mark_tick(opacity=0.5).encode( + x=x_enc, + opacity=alt.condition(brush, alt.value(0.5), alt.value(0.1)), + ) + + return _columnize(charts) + + detail_charts = render_detail_charts() + + # First Row + # Progress Over Time Chart + + def render_progress_chart(): + # Sorting trials by training start time, so that we can track the \ + # progress of the best objective so far over time + trials_df_by_tst = trials_df.sort_values(["TuningJobName", "TrainingStartTime"]) + trials_df_by_tst["cum_objective"] = trials_df_by_tst.groupby(["TuningJobName"]).transform( + lambda x: x.cummin() if minimize_objective else x.cummax() + )[objective_name] + + progress_chart = ( + alt.Chart(trials_df_by_tst) + .add_params(brush) + .add_params(job_highlight_selection) + .mark_point(filled=True, size=50) + .encode( + x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), + y=alt.Y( + f"{objective_name}:Q", + scale=alt.Scale(zero=False, padding=1), + axis=alt.Axis(title=objective_name), + ), + opacity=opacity, + tooltip=detail_tooltip, + **jobs_props, + ) + ) + + cum_obj_chart = ( + alt.Chart(trials_df_by_tst) + .mark_line( + interpolate="step-after", + opacity=1.0, + strokeDash=[3, 3], + strokeWidth=2.0, + ) + .encode( + x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), + y=alt.Y("cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), + stroke=alt.Stroke("TuningJobName:N", legend=None), + ) + ) + + if advanced: + return cum_obj_chart + progress_chart + return progress_chart + + progress_chart = render_progress_chart() + + # First Row + # KDE Training Objective + result_hist_chart = ( + alt.Chart(trials_df) + .transform_filter(brush) + .transform_density(objective_name, bandwidth=0.01) + .mark_area() + .encode( + x=alt.X("value:Q", scale=objective_scale, title=objective_name), + y="density:Q", + ) + ) + # Training Jobs + training_jobs_chart = ( + alt.Chart(trials_df.sort_values(objective_name), title="Training Jobs") + .mark_bar() + .add_params(brush) + .add_params(job_highlight_selection) + .encode( + y=alt.Y(f"{objective_name}:Q"), + x=alt.X("TrainingJobName:N", sort=None), + color=alt.Color("TrainingJobName:N"), + opacity=opacity, + **jobs_props, + ) + ) + + # Job Level Stats + + training_job_name_encodings = { + "color": alt.condition( + brush, + alt.Color("TrainingJobName:N", legend=None), + alt.value("grey"), + ), + "opacity": alt.condition(brush, alt.value(1.0), alt.value(0.3)), + "strokeWidth": alt.condition(brush, alt.value(2.5), alt.value(0.8)), + } + + duration_format = "%M:%S" + metrics_tooltip = [ + "TrainingJobName:N", + "value:Q", + "label:N", + alt.Tooltip("ts:T", format="%e:%H:%M"), + alt.Tooltip("rel_ts:T", format="%e:%H:%M"), + ] + + job_level_rows = alt.HConcatChart() + + # Use CW metrics + if not full_df.empty: + # Objective Progression + + objective_progression_chart = None + # Suppress diagram if we only have one, final, value + if ( + full_df.loc[full_df.label == objective_name] + .groupby(["TuningJobName", "TrainingJobName"])[objective_name] + .count() + .max() + > 1 + ): + objective_progression_chart = ( + alt.Chart(full_df, title=f"Progression {objective_name}", width=400) + .transform_filter(alt.FieldEqualPredicate(field="label", equal=objective_name)) + .mark_line(point=True) + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y=alt.Y("value:Q", scale=alt.Scale(zero=False)), + **training_job_name_encodings, + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if multiple_job_status: + objective_progression_chart = objective_progression_chart.encode( + strokeDash=alt.StrokeDash("TrainingJobStatus:N", legend=None) + ) + + # Secondary chart showing the same contents, but by absolute time. + objective_progression_absolute_chart = objective_progression_chart.encode( + x=alt.X("ts:T", scale=alt.Scale(nice=True)) + ) + + objective_progression_chart = ( + objective_progression_chart | objective_progression_absolute_chart + ) + + ### + + job_metrics_charts = [] + for metric in job_metrics: + metric_chart = ( + alt.Chart(full_df, title=metric, width=400) + .transform_filter(alt.FieldEqualPredicate(field="label", equal=metric)) + .encode( + y=alt.Y("value:Q", scale=alt.Scale(zero=False)), + **training_job_name_encodings, + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if ( + full_df.loc[full_df.label == metric] + .groupby(["TuningJobName", "TrainingJobName"]) + .count() + .value.max() + == 1 + ): + # single value, render as a bar over the training jobs on the x-axis + metric_chart = metric_chart.encode( + x=alt.X("TrainingJobName:N", sort=None) + ).mark_bar(interpolate="linear", point=True) + else: + # multiple values, render the values over time on the x-axis + metric_chart = metric_chart.encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)) + ).mark_line(interpolate="linear", point=True) + + job_metrics_charts.append(metric_chart) + + job_metrics_chart = _columnize(job_metrics_charts, 3) + + # Job instance + # 'MemoryUtilization', 'CPUUtilization' + instance_metrics_chart = ( + alt.Chart(full_df, title="CPU and Memory") + .transform_filter( + alt.FieldOneOfPredicate( + field="label", + oneOf=[ + "MemoryUtilization", + "CPUUtilization", + ], + ) + ) + .mark_line() + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y="value:Q", + **training_job_name_encodings, + strokeDash=alt.StrokeDash("label:N", legend=alt.Legend(orient="bottom")), + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if "GPUUtilization" in full_df.label.values: + instance_metrics_chart = ( + instance_metrics_chart + | alt.Chart(full_df, title="GPU and GPU Memory") + .transform_filter( + alt.FieldOneOfPredicate( + field="label", + oneOf=[ + "GPUMemoryUtilization", + "GPUUtilization", + ], + ) + ) + .mark_line() + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y=alt.Y("value:Q"), + **training_job_name_encodings, + strokeDash=alt.StrokeDash("label:N", legend=alt.Legend(orient="bottom")), + tooltip=metrics_tooltip, + ) + .interactive() + ) + + job_level_rows = job_metrics_chart & instance_metrics_chart + if objective_progression_chart: + job_level_rows = objective_progression_chart & job_level_rows + job_level_rows = job_level_rows.resolve_scale(strokeDash="independent").properties( + title="Job / Instance Level Metrics" + ) + + overview_row = (progress_chart | result_hist_chart).properties( + title="Hyper Parameter Tuning Job" + ) + detail_rows = detail_charts.properties(title="Hyper Parameter Details") + if job_level_rows: + job_level_rows = training_jobs_chart & job_level_rows + + return overview_row & detail_rows & job_level_rows + + +def _clean_parameter_name(s): + """ Helper method to ensure proper parameter name characters for altair 5+ """ + return s.replace(":", "_").replace(".", "_") + + +def _prepare_training_job_metrics(jobs): + """Fetches and combines CloudWatch metrics for multiple training jobs. + + Args: + jobs (list): List of (job_name, start_time, end_time) tuples. + + Returns: + pandas.DataFrame: Combined metrics DataFrame with 'TrainingJobName' column. + """ + df = pd.DataFrame() + for job_name, start_time, end_time in jobs: + job_df = get_cw_job_metrics( + job_name, + start_time=pd.Timestamp(start_time) - pd.DateOffset(hours=8), + end_time=pd.Timestamp(end_time) + pd.DateOffset(hours=8), + ) + if job_df is None: + logger.info("No CloudWatch metrics for %s. Skipping.", job_name) + continue + + job_df["TrainingJobName"] = job_name + df = pd.concat([df, job_df]) + return df + + +def _prepare_consolidated_df(trials_df): + """Merges training job metrics with trials data into a consolidated DataFrame.""" + if trials_df.empty: + return pd.DataFrame() + + logger.debug("Cache Hit/Miss: ", end="") + jobs_df = _prepare_training_job_metrics( + zip( + trials_df.TrainingJobName.values, + trials_df.TrainingStartTime.values, + trials_df.TrainingEndTime.values, + ) + ) + logger.info("") + + if jobs_df.empty: + return pd.DataFrame() + + merged_df = pd.merge(jobs_df, trials_df, on="TrainingJobName") + return merged_df + + +def _get_df(tuning_job_name, filter_out_stopped=False): + """Retrieves hyperparameter tuning job results and returns preprocessed DataFrame with + tuning metrics and parameters.""" + + tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name) + + df = tuner.dataframe() + if df.empty: # HPO job just started; no results yet + return df + + df["TuningJobName"] = tuning_job_name + + # Filter out jobs without FinalObjectiveValue + df = df[df["FinalObjectiveValue"] > -float("inf")] + + # Jobs early stopped by AMT are reported with their last + # objective value, before they are stopped. + # However this value may not be a good representation + # of the eventual objective value we would have seen + # if run without stopping. Therefore it may be confusing + # to include those runs. + # For now, if included, we use a different mark to + # discriminate visually between a stopped and finished job + + if filter_out_stopped: + df = df[df["TrainingJobStatus"] != "Stopped"] + + # Preprocessing values for [32], [64] etc. + for tuning_range in tuner.tuning_ranges.values(): + parameter_name = tuning_range["Name"] + if df.dtypes[parameter_name] == "O": + try: + # Remove decorations, like [] + df[parameter_name] = df[parameter_name].apply( + lambda v: v.replace("[", "").replace("]", "").replace('"', "") + ) + + # Is it an int? 3 would work, 3.4 would fail. + try: + df[parameter_name] = df[parameter_name].astype(int) + except ValueError: + # A float then? + df[parameter_name] = df[parameter_name].astype(float) + + except Exception: + # Trouble, as this was not a number just pretending to be a string, but an actual string with + # characters. Leaving the value untouched + # Ex: Caught exception could not convert string to float: 'sqrt' + pass + + return df + + +def _get_tuning_job_names_with_parents(tuning_job_names): + """Resolve dependent jobs, one level only""" + + all_tuning_job_names = [] + for tuning_job_name in tuning_job_names: + tuning_job_result = sm.describe_hyper_parameter_tuning_job( + HyperParameterTuningJobName=tuning_job_name + ) + + # find parent jobs and retrieve all tuner dataframes + parent_jobs = [] + if "WarmStartConfig" in tuning_job_result: + parent_jobs = [ + cfg["HyperParameterTuningJobName"] + for cfg in tuning_job_result["WarmStartConfig"]["ParentHyperParameterTuningJobs"] + ] + if parent_jobs: + logger.info("Tuning job %s's parents: %s", tuning_job_name, ", ".join(parent_jobs)) + all_tuning_job_names.extend([tuning_job_name, *parent_jobs]) + + # return de-duplicated tuning job names + return list(set(all_tuning_job_names)) + + +def get_job_analytics_data(tuning_job_names): + """Retrieves and processes analytics data from hyperparameter tuning jobs. + + Args: + tuning_job_names (str or list): Single tuning job name or list of names/tuner objects. + + Returns: + tuple: (DataFrame with training results, tuned parameters list, objective name, is_minimize flag). + + Raises: + ValueError: If tuning jobs have different objectives or optimization directions. + """ + if not isinstance(tuning_job_names, list): + tuning_job_names = [tuning_job_names] + + # Ensure to create a list of tuning job names (strings) + tuning_job_names = [ + tuning_job.describe()["HyperParameterTuningJobName"] + if isinstance(tuning_job, sagemaker.tuner.HyperparameterTuner) + else tuning_job + for tuning_job in tuning_job_names + ] + + # Maintain combined tuner dataframe from all tuning jobs + df = pd.DataFrame() + + # maintain objective, direction of optimization and tuned parameters + objective_name = None + is_minimize = None + tuned_parameters = None + + all_tuning_job_names = _get_tuning_job_names_with_parents(tuning_job_names) + + for tuning_job_name in all_tuning_job_names: + tuning_job_result = sm.describe_hyper_parameter_tuning_job( + HyperParameterTuningJobName=tuning_job_name + ) + status = tuning_job_result["HyperParameterTuningJobStatus"] + logger.info("Tuning job %-25s status: %s", tuning_job_name, status) + + df = pd.concat([df, _get_df(tuning_job_name)]) + + # maintain objective and assure that all tuning jobs use the same + job_is_minimize = ( + tuning_job_result["HyperParameterTuningJobConfig"]["HyperParameterTuningJobObjective"][ + "Type" + ] + != "Maximize" + ) + job_objective_name = tuning_job_result["HyperParameterTuningJobConfig"][ + "HyperParameterTuningJobObjective" + ]["MetricName"] + job_tuned_parameters = [ + v["Name"] + for v in sagemaker.HyperparameterTuningJobAnalytics( + tuning_job_name + ).tuning_ranges.values() + ] + + if not objective_name: + objective_name = job_objective_name + is_minimize = job_is_minimize + tuned_parameters = job_tuned_parameters + else: + if ( + objective_name != job_objective_name + or is_minimize != job_is_minimize + or set(tuned_parameters) != set(job_tuned_parameters) + ): + raise ValueError( + "All tuning jobs must use the same objective and optimization direction." + ) + + if not df.empty: + # Cleanup wrongly encoded floats, e.g. containing quotes. + for i, dtype in enumerate(df.dtypes): + column_name = str(df.columns[i]) + if column_name in [ + "TrainingJobName", + "TrainingJobStatus", + "TuningJobName", + ]: + continue + if dtype == "object": + val = df[column_name].iloc[0] + if isinstance(val, str) and val.startswith('"'): + try: + df[column_name] = df[column_name].apply(lambda x: int(x.replace('"', ""))) + except: # noqa: E722 nosec b110 if we fail, we just continue with what we had + pass # Value is not an int, but a string + + df = df.sort_values("FinalObjectiveValue", ascending=is_minimize) + df[objective_name] = df.pop("FinalObjectiveValue") + + # Fix potential issue with dates represented as objects, instead of a timestamp + # This can in other cases lead to: + # https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ + # Have only observed this for TrainingEndTime, but will be on the lookout dfor TrainingStartTime as well now + df["TrainingEndTime"] = pd.to_datetime(df["TrainingEndTime"]) + df["TrainingStartTime"] = pd.to_datetime(df["TrainingStartTime"]) + + logger.info("") + logger.info("Number of training jobs with valid objective: %d", len(df)) + logger.info("Lowest: %s Highest %s", min(df[objective_name]), max(df[objective_name])) + + tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] + + return df, tuned_parameters, objective_name, is_minimize diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index fa8f9b8555..35d468feeb 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2117,6 +2117,69 @@ def _add_estimator( delete_endpoint = removed_function("delete_endpoint") + @staticmethod + def visualize_jobs( + tuning_jobs: Union[ + str, 'sagemaker.tuner.HyperparameterTuner', + List[Union[str, 'sagemaker.tuner.HyperparameterTuner']] + ], + return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False + ): + """Create an interactive visualization based on altair charts using the sagemaker.amtviz + package. + Args: + tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): + One or more tuning jobs to create + visualization for. + return_dfs: (bool): Option to return trials and full dataframe. + job_metrics: (list[str]): Metrics to be used in charts. + trials_only: (bool): Whether to show trials only or full dataframe. + advanced: (bool): Show a cumulative step line in the progress over time chart. + Returns: + A collection of charts (altair.VConcatChart); or charts, trials_df (pandas.DataFrame), + full_df (pandas.DataFrame) if ``return_dfs=True``. + """ + try: + # Check if altair is installed + importlib.import_module('altair') + + except ImportError: + print("Altair is not installed. To use the visualization feature, please install Altair:") + print(" pip install altair") + print("After installing Altair, you can use the methods visualize_jobs or visualize_job.") + return None + + # If altair is installed, proceed with visualization + from sagemaker.amtviz import visualize_tuning_job + + return visualize_tuning_job( + tuning_jobs, + return_dfs=return_dfs, + job_metrics=job_metrics, + trials_only=trials_only, + advanced=advanced, + ) + + def visualize_job( + self, return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False + ): + """Convenience method on instance level for visualize_jobs(). + See static method visualize_jobs(). + """ + return HyperparameterTuner.visualize_jobs( + self, + return_dfs=return_dfs, + job_metrics=job_metrics, + trials_only=trials_only, + advanced=advanced, + ) + class _TuningJob(_Job): """Placeholder docstring""" diff --git a/tests/unit/test_tuner_visualize.py b/tests/unit/test_tuner_visualize.py new file mode 100644 index 0000000000..8f17f4d2db --- /dev/null +++ b/tests/unit/test_tuner_visualize.py @@ -0,0 +1,314 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Tests related to amtviz.visualization""" +from __future__ import absolute_import + +import pandas as pd +import pytest +from mock import Mock, patch, MagicMock +import sagemaker +from sagemaker.estimator import Estimator +from sagemaker.session_settings import SessionSettings +from sagemaker.tuner import ( + HyperparameterTuner +) +from tests.unit.tuner_test_utils import ( + OBJECTIVE_METRIC_NAME, + HYPERPARAMETER_RANGES, + METRIC_DEFINITIONS +) +# Visualization specific imports +from sagemaker.amtviz.visualization import visualize_tuning_job, get_job_analytics_data +from tests.unit.tuner_visualize_test_utils import ( + TUNING_JOB_NAMES, + TUNED_PARAMETERS, + OBJECTIVE_NAME, + TRIALS_DF_DATA, + FULL_DF_DATA, + TUNING_JOB_NAME_1, + TUNING_JOB_NAME_2, + TUNING_JOB_RESULT, + TRIALS_DF_COLUMNS, + FULL_DF_COLUMNS, + TRIALS_DF_TRAINING_JOB_NAMES, + TRIALS_DF_TRAINING_JOB_STATUSES, + TRIALS_DF_VALID_F1_VALUES, + FILTERED_TUNING_JOB_DF_DATA, + TUNING_RANGES +) +import altair as alt + + +def create_sagemaker_session(): + boto_mock = Mock(name="boto_session") + sms = Mock( + name="sagemaker_session", + boto_session=boto_mock, + config=None, + local_mode=False, + settings=SessionSettings() + ) + sms.sagemaker_config = {} + return sms + + +@pytest.fixture() +def sagemaker_session(): + return create_sagemaker_session() + + +@pytest.fixture() +def estimator(sagemaker_session): + return Estimator( + "image", + "role", + 1, + "ml.c4.xlarge", + output_path="s3://bucket/prefix", + sagemaker_session=sagemaker_session, + ) + + +@pytest.fixture() +def tuner(estimator): + return HyperparameterTuner( + estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS + ) + + +@pytest.fixture() +def tuner2(estimator): + return HyperparameterTuner( + estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS + ) + + +@pytest.fixture +def mock_visualize_tuning_job(): + with patch("sagemaker.amtviz.visualize_tuning_job") as mock_visualize: + mock_visualize.return_value = "mock_chart" + yield mock_visualize + + +@pytest.fixture +def mock_get_job_analytics_data(): + with patch("sagemaker.amtviz.visualization.get_job_analytics_data") as mock: + mock.return_value = ( + pd.DataFrame(TRIALS_DF_DATA), + TUNED_PARAMETERS, + OBJECTIVE_NAME, + True + ) + yield mock + + +@pytest.fixture +def mock_prepare_consolidated_df(): + with patch("sagemaker.amtviz.visualization._prepare_consolidated_df") as mock: + mock.return_value = pd.DataFrame(FULL_DF_DATA) + yield mock + + +# Test graceful handling if the required altair library is not installed +def test_visualize_jobs_altair_not_installed(capsys): + # Mock importlib.import_module to raise ImportError for 'altair' + with patch("importlib.import_module") as mock_import: + mock_import.side_effect = ImportError("No module named 'altair'") + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result is None + captured = capsys.readouterr() + assert "Altair is not installed." in captured.out + assert "pip install altair" in captured.out + + +# Test basic method call if altair is installed +def test_visualize_jobs_altair_installed(mock_visualize_tuning_job): + # Mock successful import of altair + with patch("importlib.import_module"): + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result == "mock_chart" + + +# Test for static method visualize_jobs() +def test_visualize_jobs(mock_visualize_tuning_job): + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + TUNING_JOB_NAMES, + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # Vary the parameters and check if they have been passed correctly + result = HyperparameterTuner.visualize_jobs( + [TUNING_JOB_NAME_1], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + mock_visualize_tuning_job.assert_called_with( + [TUNING_JOB_NAME_1], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + + +# Test the instance method visualize_job() on a stubbed tuner object +def test_visualize_job(tuner, mock_visualize_tuning_job): + # With default parameters + result = tuner.visualize_job() + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + tuner, + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # With varying parameters + result = tuner.visualize_job(return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_with( + tuner, + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + + +# Test the static method visualize_jobs() on multiple stubbed tuner objects +def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): + result = HyperparameterTuner.visualize_jobs([tuner, tuner2]) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + [tuner, tuner2], + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # Vary the parameters and check if they have been passed correctly + result = HyperparameterTuner.visualize_jobs( + [[tuner, tuner2]], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + mock_visualize_tuning_job.assert_called_with( + [[tuner, tuner2]], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + + +# Test direct method call for basic chart return type and default render settings +def test_visualize_tuning_job_analytics_data_results_in_altair_chart(mock_get_job_analytics_data): + result = visualize_tuning_job("mock_job") + assert alt.renderers.active == "default" + assert isinstance(result, alt.VConcatChart) + + +# Test the size and structure of the returned dataframes (trials_df and full_df) +def test_visualize_tuning_job_return_dfs(mock_get_job_analytics_data, mock_prepare_consolidated_df): + charts, trials_df, full_df = visualize_tuning_job("mock_job", return_dfs=True) + # Basic assertion for the charts + assert isinstance(charts, alt.VConcatChart) + + # Assertions for trials_df + assert isinstance(trials_df, pd.DataFrame) + assert trials_df.shape == (2, len(TRIALS_DF_COLUMNS)) + assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS + assert trials_df['TrainingJobName'].tolist() == TRIALS_DF_TRAINING_JOB_NAMES + assert trials_df['TrainingJobStatus'].tolist() == TRIALS_DF_TRAINING_JOB_STATUSES + assert trials_df['TuningJobName'].tolist() == TUNING_JOB_NAMES + assert trials_df['valid-f1'].tolist() == TRIALS_DF_VALID_F1_VALUES + + # Assertions for full_df + assert isinstance(full_df, pd.DataFrame) + assert full_df.shape == (2, 16) + assert full_df.columns.tolist() == FULL_DF_COLUMNS + + +# Test the handling of an an empty trials dataframe +@patch("sagemaker.amtviz.visualization.get_job_analytics_data") +def test_visualize_tuning_job_empty_trials(mock_get_job_analytics_data): + mock_get_job_analytics_data.return_value = ( + pd.DataFrame(), # empty dataframe + TUNED_PARAMETERS, + OBJECTIVE_NAME, + True + ) + charts = visualize_tuning_job("empty_job") + assert charts.empty + + +# Test handling of return_dfs and trials_only parameter +def test_visualize_tuning_job_trials_only(mock_get_job_analytics_data): + # If return_dfs is set to False, then only charts should be returned + result = visualize_tuning_job("mock_job", return_dfs=False, trials_only=True) + assert isinstance(result, alt.VConcatChart) + # Trials_only controls the content of the two returned dataframes (trials_df, full_df) + result, df1, df2 = visualize_tuning_job("mock_job", return_dfs=True, trials_only=True) + assert isinstance(df1, pd.DataFrame) + assert df1.shape == (2, len(TRIALS_DF_COLUMNS)) + assert isinstance(df2, pd.DataFrame) + assert df2.empty + # The combination of return_dfs and trials_only=False is covered in 'test_visualize_tuning_job_return_dfs' + + +# Check if all parameters are correctly passed to the (mocked) create_charts method +@patch("sagemaker.amtviz.visualization.create_charts") +def test_visualize_tuning_job_with_full_df( + mock_create_charts, + mock_get_job_analytics_data, + mock_prepare_consolidated_df +): + mock_create_charts.return_value = alt.Chart() + visualize_tuning_job("dummy_job") + + # Check the create_charts call arguments + call_args = mock_create_charts.call_args[0] + call_kwargs = mock_create_charts.call_args[1] + assert isinstance(call_args[0], pd.DataFrame) # trials_df + assert isinstance(call_args[1], list) # tuned_parameters + assert isinstance(call_args[2], pd.DataFrame) # full_df + assert isinstance(call_args[3], str) # objective_name + assert call_kwargs.get("minimize_objective") + + # Check the details of the passed arguments + trials_df = call_args[0] + assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS + tuned_parameters = call_args[1] + assert tuned_parameters == TUNED_PARAMETERS + objective_name = call_args[3] + assert objective_name == OBJECTIVE_NAME + full_df = call_args[2] + assert full_df.columns.tolist() == FULL_DF_COLUMNS + + +# Test the dataframe produced by get_job_analytics_data() +@patch("sagemaker.HyperparameterTuningJobAnalytics") +def test_get_job_analytics_data(mock_hyperparameter_tuning_job_analytics): + # Mock sagemaker's describe_hyper_parameter_tuning_job and some internal methods + sagemaker.amtviz.visualization.sm.describe_hyper_parameter_tuning_job = Mock(return_value=TUNING_JOB_RESULT) + sagemaker.amtviz.visualization._get_tuning_job_names_with_parents = Mock( + return_value=[TUNING_JOB_NAME_1, TUNING_JOB_NAME_2]) + sagemaker.amtviz.visualization._get_df = Mock(return_value=pd.DataFrame(FILTERED_TUNING_JOB_DF_DATA)) + mock_tuning_job_instance = MagicMock() + mock_hyperparameter_tuning_job_analytics.return_value = mock_tuning_job_instance + mock_tuning_job_instance.tuning_ranges.values.return_value = TUNING_RANGES + + df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data([TUNING_JOB_NAME_1]) + assert df.shape == (4, 12) + assert df.columns.tolist() == TRIALS_DF_COLUMNS + assert tuned_parameters == TUNED_PARAMETERS + assert objective_name == OBJECTIVE_NAME + assert is_minimize is False diff --git a/tests/unit/tuner_visualize_test_utils.py b/tests/unit/tuner_visualize_test_utils.py new file mode 100644 index 0000000000..17a993717c --- /dev/null +++ b/tests/unit/tuner_visualize_test_utils.py @@ -0,0 +1,136 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +TRIALS_DF_COLUMNS = [ + 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobName', + 'TrainingJobStatus', + 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' +] + +FULL_DF_COLUMNS = [ + 'value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', + 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', + 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' +] + + +TRIALS_DF_TRAINING_JOB_NAMES = [ + 'random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1' +] + +TRIALS_DF_TRAINING_JOB_STATUSES = ['Completed', 'Completed'] + +TUNING_JOB_NAME_1 = 'random-240712-1500' +TUNING_JOB_NAME_2 = 'bayesian-240712-1600' +TUNING_JOB_NAMES = [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] +TRIALS_DF_VALID_F1_VALUES = [0.950, 0.896] + +FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', + 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', + 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] + +TUNED_PARAMETERS = ['n-estimators', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'criterion'] +OBJECTIVE_NAME = 'valid-f1' + +TRIALS_DF_DATA = { + 'criterion': ['gini', 'log_loss'], + 'max-depth': [18.0, 8.0], + 'min-samples-leaf': [3.0, 10.0], + 'min-weight-fraction-leaf': [0.011596, 0.062067], + 'n-estimators': [110.0, 18.0], + 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1'], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:56:50+02:00'], + 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:57:29+02:00'], + 'TrainingElapsedTimeSeconds': [44.0, 39.0], + 'TuningJobName': TUNING_JOB_NAMES, + 'valid-f1': [0.950, 0.896] +} + +FULL_DF_DATA = { + 'value': [0.951000, 0.950000], + 'ts': ['2024-07-12 15:56:00', '2024-07-12 15:56:00'], + 'label': ['valid-precision', 'valid-recall'], + 'rel_ts': ['1970-01-01 01:00:00', '1970-01-01 01:00:00'], + 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-019-4ac17a84'], + 'criterion': ['gini', 'gini'], + 'max-depth': [18.0, 18.0], + 'min-samples-leaf': [3.0, 3.0], + 'min-weight-fraction-leaf': [0.011596, 0.011596], + 'n-estimators': [110.0, 110.0], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:55:59+02:00'], + 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:56:43+02:00'], + 'TrainingElapsedTimeSeconds': [44.0, 45.0], + 'TuningJobName': ['random-240712-1545', 'random-240712-1545'], + 'valid-f1': [0.9500, 0.9500] +} + +FILTERED_TUNING_JOB_DF_DATA = { + 'criterion': ['log_loss', 'gini'], + 'max-depth': [10.0, 16.0], + 'min-samples-leaf': [7.0, 2.0], + 'min-weight-fraction-leaf': [0.160910, 0.069803], + 'n-estimators': [67.0, 79.0], + 'TrainingJobName': ['random-240712-1545-050-c0b5c10a', 'random-240712-1545-049-2db2ec05'], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'FinalObjectiveValue': [0.8190, 0.8910], + 'TrainingStartTime': ['2024-07-12 18:09:48+02:00', '2024-07-12 18:09:45+02:00'], + 'TrainingEndTime': ['2024-07-12 18:10:28+02:00', '2024-07-12 18:10:23+02:00'], + 'TrainingElapsedTimeSeconds': [40.0, 38.0], + 'TuningJobName': [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] +} + +TUNING_RANGES = [ + { + 'Name': 'n-estimators', + 'MinValue': '1', + 'MaxValue': '200', + 'ScalingType': 'Auto' + }, + { + 'Name': 'max-depth', + 'MinValue': '1', + 'MaxValue': '20', + 'ScalingType': 'Auto' + }, + { + 'Name': 'min-samples-leaf', + 'MinValue': '1', + 'MaxValue': '10', + 'ScalingType': 'Auto' + }, + { + 'Name': 'min-weight-fraction-leaf', + 'MinValue': '0.01', + 'MaxValue': '0.5', + 'ScalingType': 'Auto' + }, + { + 'Name': 'criterion', + 'Values': ['"gini"', '"entropy"', '"log_loss"'] + } +] + +TUNING_JOB_RESULT = { + 'HyperParameterTuningJobName': TUNING_JOB_NAME_1, + 'HyperParameterTuningJobConfig': { + 'Strategy': 'Random', + 'HyperParameterTuningJobObjective': { + 'Type': 'Maximize', + 'MetricName': 'valid-f1' + } + }, + 'HyperParameterTuningJobStatus': 'Completed', +} diff --git a/tox.ini b/tox.ini index b16c0d2f0b..21e7248da2 100644 --- a/tox.ini +++ b/tox.ini @@ -86,6 +86,7 @@ commands = pip install 'torch==2.0.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html' pip install 'torchvision==0.15.2+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html' pip install 'dill>=0.3.8' + pip install 'altair>=5.3' # needed for amtviz pytest {posargs} deps = .[test]