laurenyu
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎CHANGELOG.rst
+1 b/‎CHANGELOG.rst
+1
diff --git a/‎src/sagemaker/__init__.py
+2 b/‎src/sagemaker/__init__.py
+2
diff --git a/‎src/sagemaker/analytics.py
+294 b/‎src/sagemaker/analytics.py
+294
diff --git a/‎src/sagemaker/estimator.py
+9 b/‎src/sagemaker/estimator.py
+9
diff --git a/‎src/sagemaker/tuner.py
+15-1 b/‎src/sagemaker/tuner.py
+15-1
@@ -24,3 +24,4 @@ doc/_templates
 venv/
 *~
 .pytest_cache/
+*.swp
@@ -6,6 +6,7 @@ CHANGELOG
 1.2.dev5
 ========
 
+* feature: Analytics functions for metrics in Training and HyperparameterTuning jobs
 * bug-fix: Change module names to string type in __all__
 * feature: Save training output files in local mode
 * bug-fix: tensorflow-serving-api: SageMaker does not conflict with tensorflow-serving-api module version
 
@@ -21,6 +21,7 @@
 from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor
 from sagemaker.amazon.ntm import NTM, NTMModel, NTMPredictor
 from sagemaker.amazon.randomcutforest import RandomCutForest, RandomCutForestModel, RandomCutForestPredictor
+from sagemaker.analytics import TrainingJobAnalytics, HyperparameterTuningJobAnalytics
 
 from sagemaker.local.local_session import LocalSession
 
@@ -39,4 +40,5 @@
            'FactorizationMachines', 'FactorizationMachinesModel', 'FactorizationMachinesPredictor',
            'RandomCutForest', 'RandomCutForestModel', 'RandomCutForestPredictor',
            'Model', 'NTM', 'NTMModel', 'NTMPredictor', 'RealTimePredictor', 'Session', 'LocalSession',
+           'TrainingJobAnalytics', 'HyperparameterTuningJobAnalytics',
            'container_def', 's3_input', 'production_variant', 'get_execution_role']
@@ -0,0 +1,294 @@
+# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import print_function, absolute_import
+
+from abc import ABCMeta, abstractmethod
+from collections import defaultdict
+import datetime
+import logging
+
+from six import with_metaclass
+
+from sagemaker.session import Session
+from sagemaker.utils import DeferredError
+
+try:
+    import pandas as pd
+except ImportError as e:
+    logging.warning("pandas failed to import. Analytics features will be impaired or broken.")
+    # Any subsequent attempt to use pandas will raise the ImportError
+    pd = DeferredError(e)
+
+
+class AnalyticsMetricsBase(with_metaclass(ABCMeta, object)):
+    """Base class for tuning job or training job analytics classes.
+    Understands common functionality like persistence and caching.
+    """
+
+    def export_csv(self, filename):
+        """Persists the analytics dataframe to a file.
+
+        Args:
+            filename (str): The name of the file to save to.
+        """
+        self.dataframe().to_csv(filename)
+
+    def dataframe(self, force_refresh=False):
+        """A pandas dataframe with lots of interesting results about this object.
+        Created by calling SageMaker List and Describe APIs and converting them into
+        a convenient tabular summary.
+
+        Args:
+            force_refresh (bool): Set to True to fetch the latest data from SageMaker API.
+        """
+        if force_refresh:
+            self.clear_cache()
+        if self._dataframe is None:
+            self._dataframe = self._fetch_dataframe()
+        return self._dataframe
+
+    @abstractmethod
+    def _fetch_dataframe(self):
+        """Sub-class must calculate the dataframe and return it.
+        """
+        pass
+
+    def clear_cache(self):
+        """Clears the object of all local caches of API methods, so
+        that the next time any properties are accessed they will be refreshed from
+        the service.
+        """
+        self._dataframe = None
+
+
+class HyperparameterTuningJobAnalytics(AnalyticsMetricsBase):
+    """Fetches results about this tuning job and makes them accessible for analytics.
+    """
+
+    def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None):
+        """Initialize an ``HyperparameterTuningJobAnalytics`` instance.
+
+        Args:
+            hyperparameter_tuning_job_name (str): name of the HyperparameterTuningJob to
+                analyze.
+            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
+                Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
+                using the default AWS configuration chain.
+        """
+        sagemaker_session = sagemaker_session or Session()
+        self._sage_client = sagemaker_session.sagemaker_client
+        self._tuning_job_name = hyperparameter_tuning_job_name
+        self.clear_cache()
+
+    @property
+    def name(self):
+        """Name of the HyperparameterTuningJob being analyzed
+        """
+        return self._tuning_job_name
+
+    def __repr__(self):
+        return "<sagemaker.HyperparameterTuningJobAnalytics for %s>" % self.name
+
+    def clear_cache(self):
+        """Clears the object of all local caches of API methods.
+        """
+        super(HyperparameterTuningJobAnalytics, self).clear_cache()
+        self._tuning_job_describe_result = None
+        self._training_job_summaries = None
+
+    def _fetch_dataframe(self):
+        """Returns a pandas dataframe with all the training jobs, their
+        hyperparameters, results, and metadata about the training jobs.
+        Includes a column to indicate that any job was the best seen so far.
+        """
+        def reshape(training_summary):
+            # Helper method to reshape a single training job summary into a dataframe record
+            out = {}
+            for k, v in training_summary['TunedHyperParameters'].items():
+                # Something (bokeh?) gets confused with ints so convert to float
+                try:
+                    v = float(v)
+                except (TypeError, ValueError):
+                    pass
+                out[k] = v
+            out['TrainingJobName'] = training_summary['TrainingJobName']
+            out['TrainingJobStatus'] = training_summary['TrainingJobStatus']
+            out['FinalObjectiveValue'] = training_summary.get('FinalHyperParameterTuningJobObjectiveMetric',
+                                                              {}).get('Value')
+
+            start_time = training_summary['CreationTime']
+            end_time = training_summary['TrainingEndTime']
+            out['TrainingStartTime'] = start_time
+            out['TrainingEndTime'] = end_time
+            if start_time and end_time:
+                out['TrainingElapsedTimeSeconds'] = (end_time - start_time).total_seconds()
+            return out
+        # Run that helper over all the summaries.
+        df = pd.DataFrame([reshape(tjs) for tjs in self.training_job_summaries()])
+        return df
+
+    @property
+    def tuning_ranges(self):
+        """A dict describing the ranges of all tuned hyperparameters.
+        Dict's key is the name of the hyper param.  Dict's value is the range.
+        """
+        out = {}
+        for _, ranges in self.description()['HyperParameterTuningJobConfig']['ParameterRanges'].items():
+            for param in ranges:
+                out[param['Name']] = param
+        return out
+
+    def description(self, force_refresh=False):
+        """Response to DescribeHyperParameterTuningJob
+
+        Args:
+            force_refresh (bool): Set to True to fetch the latest data from SageMaker API.
+        """
+        if force_refresh:
+            self.clear_cache()
+        if not self._tuning_job_describe_result:
+            self._tuning_job_describe_result = self._sage_client.describe_hyper_parameter_tuning_job(
+                HyperParameterTuningJobName=self.name
+            )
+        return self._tuning_job_describe_result
+
+    def training_job_summaries(self, force_refresh=False):
+        """A list of everything (paginated) from ListTrainingJobsForTuningJob
+
+        Args:
+            force_refresh (bool): Set to True to fetch the latest data from SageMaker API.
+        """
+        if force_refresh:
+            self.clear_cache()
+        if self._training_job_summaries is not None:
+            return self._training_job_summaries
+        output = []
+        next_args = {}
+        for count in range(100):
+            logging.debug("Calling list_training_jobs_for_hyper_parameter_tuning_job %d" % count)
+            raw_result = self._sage_client.list_training_jobs_for_hyper_parameter_tuning_job(
+                HyperParameterTuningJobName=self.name, MaxResults=100, **next_args
+            )
+            new_output = raw_result['TrainingJobSummaries']
+            output.extend(new_output)
+            logging.debug("Got %d more TrainingJobs. Total so far: %d" % (len(new_output), len(output)))
+            if ('NextToken' in raw_result) and (len(new_output) > 0):
+                next_args['NextToken'] = raw_result['NextToken']
+            else:
+                break
+        self._training_job_summaries = output
+        return output
+
+
+class TrainingJobAnalytics(AnalyticsMetricsBase):
+    """Fetches training curve data from CloudWatch Metrics for a specific training job.
+    """
+
+    CLOUDWATCH_NAMESPACE = '/aws/sagemaker/HyperParameterTuningJobs'
+
+    def __init__(self, training_job_name, metric_names, sagemaker_session=None):
+        """Initialize an ``TrainingJobAnalytics`` instance.
+
+        Args:
+            training_job_name (str): name of the TrainingJob to analyze.
+            metric_names (list): string names of all the metrics to collect for this training job
+            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
+                Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
+                using the default AWS configuration chain.
+        """
+        sagemaker_session = sagemaker_session or Session()
+        self._sage_client = sagemaker_session.sagemaker_client
+        self._cloudwatch = sagemaker_session.boto_session.client('cloudwatch')
+        self._training_job_name = training_job_name
+        self._metric_names = metric_names
+        self.clear_cache()
+
+    @property
+    def name(self):
+        """Name of the TrainingJob being analyzed
+        """
+        return self._training_job_name
+
+    def __repr__(self):
+        return "<sagemaker.TrainingJobAnalytics for %s>" % self.name
+
+    def clear_cache(self):
+        """Clears the object of all local caches of API methods, so
+        that the next time any properties are accessed they will be refreshed from
+        the service.
+        """
+        super(TrainingJobAnalytics, self).clear_cache()
+        self._data = defaultdict(list)
+        self._time_interval = self._determine_timeinterval()
+
+    def _determine_timeinterval(self):
+        """Returns a dict with two datetime objects, start_time and end_time
+        covering the interval of the training job
+        """
+        description = self._sage_client.describe_training_job(TrainingJobName=self.name)
+        start_time = description[u'TrainingStartTime']  # datetime object
+        end_time = description.get(u'TrainingEndTime', datetime.datetime.utcnow())
+        return {
+            'start_time': start_time,
+            'end_time': end_time,
+        }
+
+    def _fetch_dataframe(self):
+        for metric_name in self._metric_names:
+            self._fetch_metric(metric_name)
+        return pd.DataFrame(self._data)
+
+    def _fetch_metric(self, metric_name):
+        """Fetches all the values of a named metric, and adds them to _data
+        """
+        request = {
+            'Namespace': self.CLOUDWATCH_NAMESPACE,
+            'MetricName': metric_name,
+            'Dimensions': [
+                {
+                    'Name': 'TrainingJobName',
+                    'Value': self.name
+                }
+            ],
+            'StartTime': self._time_interval['start_time'],
+            'EndTime': self._time_interval['end_time'],
+            'Period': 60,
+            'Statistics': ['Average'],
+        }
+        raw_cwm_data = self._cloudwatch.get_metric_statistics(**request)['Datapoints']
+        if len(raw_cwm_data) == 0:
+            logging.warning("Warning: No metrics called %s found" % metric_name)
+            return
+
+        # Process data: normalize to starting time, and sort.
+        base_time = min(raw_cwm_data, key=lambda pt: pt['Timestamp'])['Timestamp']
+        all_xy = []
+        for pt in raw_cwm_data:
+            y = pt['Average']
+            x = (pt['Timestamp'] - base_time).total_seconds()
+            all_xy.append([x, y])
+        all_xy = sorted(all_xy, key=lambda x: x[0])
+
+        # Store everything in _data to make a dataframe from
+        for elapsed_seconds, value in all_xy:
+            self._add_single_metric(elapsed_seconds, metric_name, value)
+
+    def _add_single_metric(self, timestamp, metric_name, value):
+        """Stores a single metric in the _data dict which can be
+        converted to a dataframe.
+        """
+        # note that this method is built this way to make it possible to
+        # support live-refreshing charts in Bokeh at some point in the future.
+        self._data['timestamp'].append(timestamp)
+        self._data['metric_name'].append(metric_name)
+        self._data['value'].append(value)
@@ -30,6 +30,7 @@
 from sagemaker.session import Session
 from sagemaker.session import s3_input
 from sagemaker.utils import base_name_from_image, name_from_base, get_config_value
+from sagemaker.analytics import TrainingJobAnalytics
 
 
 class EstimatorBase(with_metaclass(ABCMeta, object)):
@@ -317,6 +318,14 @@ def delete_endpoint(self):
             raise ValueError('Endpoint was not created yet')
         self.sagemaker_session.delete_endpoint(self.latest_training_job.name)
 
+    @property
+    def training_job_analytics(self):
+        """Returns a TrainingJobAnalytics object for the current training job.
+        """
+        if self._current_job_name is None:
+            raise ValueError('Estimator is not associated with a TrainingJob')
+        return TrainingJobAnalytics(self._current_job_name)
+
 
 class _TrainingJob(_Job):
     def __init__(self, sagemaker_session, training_job_name):
 
@@ -15,6 +15,7 @@
 import inspect
 import json
 
+from sagemaker.analytics import HyperparameterTuningJobAnalytics
 from sagemaker.estimator import Framework
 from sagemaker.job import _Job
 from sagemaker.utils import base_name_from_image, name_from_base
@@ -201,6 +202,19 @@ def hyperparameter_ranges(self):
             hyperparameter_ranges[range_type + 'ParameterRanges'] = parameter_ranges
         return hyperparameter_ranges
 
+    @property
+    def sagemaker_session(self):
+        """The tuner shares the sagemaker_session object with its estimator.
+        Convenience method.
+        """
+        return self.estimator.sagemaker_session
+
+    def analytics(self):
+        """An instance of HyperparameterTuningJobAnalytics for this latest tuning job of this tuner.
+        Analytics olbject gives you access to tuning results summarized into a pandas dataframe.
+        """
+        return HyperparameterTuningJobAnalytics(self.latest_tuning_job, self.sagemaker_session)
+
     def _validate_parameter_ranges(self):
         from sagemaker.amazon.hyperparameter import Hyperparameter as hp  # noqa
 
@@ -262,7 +276,7 @@ def start_new(cls, tuner, inputs):
                                                resource_config=(config['resource_config']),
                                                stop_condition=(config['stop_condition']))
 
-        return cls(tuner.estimator.sagemaker_session, tuning_job_name)
+        return cls(tuner.sagemaker_session, tuning_job_name)
 
     def stop(self):
         self.sagemaker_session.stop_tuning_job(HyperParameterTuningJobName=self.name)
-Original file line number
+Diff line change
 venv/
 *~
 .pytest_cache/
 +*.swp