diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py
index c0fdebd565..b156f2e65f 100644
--- a/src/sagemaker/amazon/amazon_estimator.py
+++ b/src/sagemaker/amazon/amazon_estimator.py
@@ -16,7 +16,7 @@
 import json
 import logging
 import tempfile
-from typing import Union
+from typing import Union, Optional, Dict
 
 from six.moves.urllib.parse import urlparse
 
@@ -30,6 +30,7 @@
 from sagemaker.utils import sagemaker_timestamp
 from sagemaker.workflow.entities import PipelineVariable
 from sagemaker.workflow.pipeline_context import runnable_by_pipeline
+from sagemaker.workflow import is_pipeline_variable
 
 logger = logging.getLogger(__name__)
 
@@ -40,18 +41,20 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase):
     This class isn't intended to be instantiated directly.
     """
 
-    feature_dim = hp("feature_dim", validation.gt(0), data_type=int)
-    mini_batch_size = hp("mini_batch_size", validation.gt(0), data_type=int)
-    repo_name = None
-    repo_version = None
+    feature_dim: hp = hp("feature_dim", validation.gt(0), data_type=int)
+    mini_batch_size: hp = hp("mini_batch_size", validation.gt(0), data_type=int)
+    repo_name: Optional[str] = None
+    repo_version: Optional[str] = None
+
+    DEFAULT_MINI_BATCH_SIZE: Optional[int] = None
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        data_location=None,
-        enable_network_isolation=False,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        data_location: Optional[str] = None,
+        enable_network_isolation: Union[bool, PipelineVariable] = False,
         **kwargs
     ):
         """Initialize an AmazonAlgorithmEstimatorBase.
@@ -62,16 +65,16 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if it needs to access an AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training. Required.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'. Required.
             data_location (str or None): The s3 prefix to upload RecordSet
                 objects to, expressed as an S3 url. For example
                 "s3://example-bucket/some-key-prefix/". Objects will be saved in
                 a unique sub-directory of the specified location. If None, a
                 default data location will be used.
-            enable_network_isolation (bool): Specifies whether container will
+            enable_network_isolation (bool or PipelineVariable): Specifies whether container will
                 run in network isolation mode. Network isolation mode restricts
                 the container access to outside networks (such as the internet).
                 Also known as internet-free mode (default: ``False``).
@@ -113,8 +116,14 @@ def data_location(self):
         return self._data_location
 
     @data_location.setter
-    def data_location(self, data_location):
+    def data_location(self, data_location: str):
         """Placeholder docstring"""
+        if is_pipeline_variable(data_location):
+            raise TypeError(
+                "Invalid input: data_location should be a plain string "
+                "rather than a pipeline variable - ({}).".format(type(data_location))
+            )
+
         if not data_location.startswith("s3://"):
             raise ValueError(
                 'Expecting an S3 URL beginning with "s3://". Got "{}"'.format(data_location)
@@ -198,12 +207,12 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
     @runnable_by_pipeline
     def fit(
         self,
-        records,
-        mini_batch_size=None,
-        wait=True,
-        logs=True,
-        job_name=None,
-        experiment_config=None,
+        records: "RecordSet",
+        mini_batch_size: Optional[int] = None,
+        wait: bool = True,
+        logs: bool = True,
+        job_name: Optional[str] = None,
+        experiment_config: Optional[Dict[str, str]] = None,
     ):
         """Fit this Estimator on serialized Record objects, stored in S3.
 
@@ -301,6 +310,20 @@ def record_set(self, train, labels=None, channel="train", encrypt=False):
             channel=channel,
         )
 
+    def _get_default_mini_batch_size(self, num_records: int):
+        """Generate the default mini_batch_size"""
+        if is_pipeline_variable(self.instance_count):
+            logger.warning(
+                "mini_batch_size is not given in .fit() and instance_count is a "
+                "pipeline variable (%s) which is only interpreted in pipeline execution time. "
+                "Thus setting mini_batch_size to 1, since it can't be greater than "
+                "number of records per instance_count, otherwise the training job fails.",
+                type(self.instance_count),
+            )
+            return 1
+
+        return min(self.DEFAULT_MINI_BATCH_SIZE, max(1, int(num_records / self.instance_count)))
+
 
 class RecordSet(object):
     """Placeholder docstring"""
@@ -461,7 +484,7 @@ def upload_numpy_to_s3_shards(
             raise ex
 
 
-def get_image_uri(region_name, repo_name, repo_version=1):
+def get_image_uri(region_name, repo_name, repo_version="1"):
     """Deprecated method. Please use sagemaker.image_uris.retrieve().
 
     Args:
diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py
index 927f3d4ebf..4d01897dbe 100644
--- a/src/sagemaker/amazon/factorization_machines.py
+++ b/src/sagemaker/amazon/factorization_machines.py
@@ -37,83 +37,83 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase):
     sparse datasets economically.
     """
 
-    repo_name = "factorization-machines"
-    repo_version = 1
+    repo_name: str = "factorization-machines"
+    repo_version: str = "1"
 
-    num_factors = hp("num_factors", gt(0), "An integer greater than zero", int)
-    predictor_type = hp(
+    num_factors: hp = hp("num_factors", gt(0), "An integer greater than zero", int)
+    predictor_type: hp = hp(
         "predictor_type",
         isin("binary_classifier", "regressor"),
         'Value "binary_classifier" or "regressor"',
         str,
     )
-    epochs = hp("epochs", gt(0), "An integer greater than 0", int)
-    clip_gradient = hp("clip_gradient", (), "A float value", float)
-    eps = hp("eps", (), "A float value", float)
-    rescale_grad = hp("rescale_grad", (), "A float value", float)
-    bias_lr = hp("bias_lr", ge(0), "A non-negative float", float)
-    linear_lr = hp("linear_lr", ge(0), "A non-negative float", float)
-    factors_lr = hp("factors_lr", ge(0), "A non-negative float", float)
-    bias_wd = hp("bias_wd", ge(0), "A non-negative float", float)
-    linear_wd = hp("linear_wd", ge(0), "A non-negative float", float)
-    factors_wd = hp("factors_wd", ge(0), "A non-negative float", float)
-    bias_init_method = hp(
+    epochs: hp = hp("epochs", gt(0), "An integer greater than 0", int)
+    clip_gradient: hp = hp("clip_gradient", (), "A float value", float)
+    eps: hp = hp("eps", (), "A float value", float)
+    rescale_grad: hp = hp("rescale_grad", (), "A float value", float)
+    bias_lr: hp = hp("bias_lr", ge(0), "A non-negative float", float)
+    linear_lr: hp = hp("linear_lr", ge(0), "A non-negative float", float)
+    factors_lr: hp = hp("factors_lr", ge(0), "A non-negative float", float)
+    bias_wd: hp = hp("bias_wd", ge(0), "A non-negative float", float)
+    linear_wd: hp = hp("linear_wd", ge(0), "A non-negative float", float)
+    factors_wd: hp = hp("factors_wd", ge(0), "A non-negative float", float)
+    bias_init_method: hp = hp(
         "bias_init_method",
         isin("normal", "uniform", "constant"),
         'Value "normal", "uniform" or "constant"',
         str,
     )
-    bias_init_scale = hp("bias_init_scale", ge(0), "A non-negative float", float)
-    bias_init_sigma = hp("bias_init_sigma", ge(0), "A non-negative float", float)
-    bias_init_value = hp("bias_init_value", (), "A float value", float)
-    linear_init_method = hp(
+    bias_init_scale: hp = hp("bias_init_scale", ge(0), "A non-negative float", float)
+    bias_init_sigma: hp = hp("bias_init_sigma", ge(0), "A non-negative float", float)
+    bias_init_value: hp = hp("bias_init_value", (), "A float value", float)
+    linear_init_method: hp = hp(
         "linear_init_method",
         isin("normal", "uniform", "constant"),
         'Value "normal", "uniform" or "constant"',
         str,
     )
-    linear_init_scale = hp("linear_init_scale", ge(0), "A non-negative float", float)
-    linear_init_sigma = hp("linear_init_sigma", ge(0), "A non-negative float", float)
-    linear_init_value = hp("linear_init_value", (), "A float value", float)
-    factors_init_method = hp(
+    linear_init_scale: hp = hp("linear_init_scale", ge(0), "A non-negative float", float)
+    linear_init_sigma: hp = hp("linear_init_sigma", ge(0), "A non-negative float", float)
+    linear_init_value: hp = hp("linear_init_value", (), "A float value", float)
+    factors_init_method: hp = hp(
         "factors_init_method",
         isin("normal", "uniform", "constant"),
         'Value "normal", "uniform" or "constant"',
         str,
     )
-    factors_init_scale = hp("factors_init_scale", ge(0), "A non-negative float", float)
-    factors_init_sigma = hp("factors_init_sigma", ge(0), "A non-negative float", float)
-    factors_init_value = hp("factors_init_value", (), "A float value", float)
+    factors_init_scale: hp = hp("factors_init_scale", ge(0), "A non-negative float", float)
+    factors_init_sigma: hp = hp("factors_init_sigma", ge(0), "A non-negative float", float)
+    factors_init_value: hp = hp("factors_init_value", (), "A float value", float)
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        num_factors=None,
-        predictor_type=None,
-        epochs=None,
-        clip_gradient=None,
-        eps=None,
-        rescale_grad=None,
-        bias_lr=None,
-        linear_lr=None,
-        factors_lr=None,
-        bias_wd=None,
-        linear_wd=None,
-        factors_wd=None,
-        bias_init_method=None,
-        bias_init_scale=None,
-        bias_init_sigma=None,
-        bias_init_value=None,
-        linear_init_method=None,
-        linear_init_scale=None,
-        linear_init_sigma=None,
-        linear_init_value=None,
-        factors_init_method=None,
-        factors_init_scale=None,
-        factors_init_sigma=None,
-        factors_init_value=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        num_factors: Optional[int] = None,
+        predictor_type: Optional[str] = None,
+        epochs: Optional[int] = None,
+        clip_gradient: Optional[float] = None,
+        eps: Optional[float] = None,
+        rescale_grad: Optional[float] = None,
+        bias_lr: Optional[float] = None,
+        linear_lr: Optional[float] = None,
+        factors_lr: Optional[float] = None,
+        bias_wd: Optional[float] = None,
+        linear_wd: Optional[float] = None,
+        factors_wd: Optional[float] = None,
+        bias_init_method: Optional[str] = None,
+        bias_init_scale: Optional[float] = None,
+        bias_init_sigma: Optional[float] = None,
+        bias_init_value: Optional[float] = None,
+        linear_init_method: Optional[str] = None,
+        linear_init_scale: Optional[float] = None,
+        linear_init_sigma: Optional[float] = None,
+        linear_init_value: Optional[float] = None,
+        factors_init_method: Optional[str] = None,
+        factors_init_scale: Optional[float] = None,
+        factors_init_sigma: Optional[float] = None,
+        factors_init_value: Optional[float] = None,
         **kwargs
     ):
         """Factorization Machines is :class:`Estimator` for general-purpose supervised learning.
@@ -160,9 +160,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             num_factors (int): Dimensionality of factorization.
             predictor_type (str): Type of predictor 'binary_classifier' or
@@ -183,7 +183,7 @@ def __init__(
             linear_wd (float): Non-negative weight decay for linear terms.
             factors_wd (float): Non-negative weight decay for factorization
                 terms.
-            bias_init_method (string): Initialization method for the bias term:
+            bias_init_method (str): Initialization method for the bias term:
                 'normal', 'uniform' or 'constant'.
             bias_init_scale (float): Non-negative range for initialization of
                 the bias term that takes effect when bias_init_method parameter
@@ -193,7 +193,7 @@ def __init__(
                 bias_init_method parameter is 'normal'.
             bias_init_value (float): Initial value of the bias term that takes
                 effect when bias_init_method parameter is 'constant'.
-            linear_init_method (string): Initialization method for linear term:
+            linear_init_method (str): Initialization method for linear term:
                 'normal', 'uniform' or 'constant'.
             linear_init_scale (float): Non-negative range for initialization of
                 linear terms that takes effect when linear_init_method parameter
@@ -203,7 +203,7 @@ def __init__(
                 linear_init_method parameter is 'normal'.
             linear_init_value (float): Initial value of linear terms that takes
                 effect when linear_init_method parameter is 'constant'.
-            factors_init_method (string): Initialization method for
+            factors_init_method (str): Initialization method for
                 factorization term: 'normal', 'uniform' or 'constant'.
             factors_init_scale (float): Non-negative range for initialization of
                 factorization terms that takes effect when factors_init_method
diff --git a/src/sagemaker/amazon/ipinsights.py b/src/sagemaker/amazon/ipinsights.py
index a73853ad75..a3562f8434 100644
--- a/src/sagemaker/amazon/ipinsights.py
+++ b/src/sagemaker/amazon/ipinsights.py
@@ -36,45 +36,45 @@ class IPInsights(AmazonAlgorithmEstimatorBase):
     as user IDs or account numbers.
     """
 
-    repo_name = "ipinsights"
-    repo_version = 1
-    MINI_BATCH_SIZE = 10000
+    repo_name: str = "ipinsights"
+    repo_version: str = "1"
+    MINI_BATCH_SIZE: int = 10000
 
-    num_entity_vectors = hp(
+    num_entity_vectors: hp = hp(
         "num_entity_vectors", (ge(1), le(250000000)), "An integer in [1, 250000000]", int
     )
-    vector_dim = hp("vector_dim", (ge(4), le(4096)), "An integer in [4, 4096]", int)
+    vector_dim: hp = hp("vector_dim", (ge(4), le(4096)), "An integer in [4, 4096]", int)
 
-    batch_metrics_publish_interval = hp(
+    batch_metrics_publish_interval: hp = hp(
         "batch_metrics_publish_interval", (ge(1)), "An integer greater than 0", int
     )
-    epochs = hp("epochs", (ge(1)), "An integer greater than 0", int)
-    learning_rate = hp("learning_rate", (ge(1e-6), le(10.0)), "A float in [1e-6, 10.0]", float)
-    num_ip_encoder_layers = hp(
+    epochs: hp = hp("epochs", (ge(1)), "An integer greater than 0", int)
+    learning_rate: hp = hp("learning_rate", (ge(1e-6), le(10.0)), "A float in [1e-6, 10.0]", float)
+    num_ip_encoder_layers: hp = hp(
         "num_ip_encoder_layers", (ge(0), le(100)), "An integer in [0, 100]", int
     )
-    random_negative_sampling_rate = hp(
+    random_negative_sampling_rate: hp = hp(
         "random_negative_sampling_rate", (ge(0), le(500)), "An integer in [0, 500]", int
     )
-    shuffled_negative_sampling_rate = hp(
+    shuffled_negative_sampling_rate: hp = hp(
         "shuffled_negative_sampling_rate", (ge(0), le(500)), "An integer in [0, 500]", int
     )
-    weight_decay = hp("weight_decay", (ge(0.0), le(10.0)), "A float in [0.0, 10.0]", float)
+    weight_decay: hp = hp("weight_decay", (ge(0.0), le(10.0)), "A float in [0.0, 10.0]", float)
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        num_entity_vectors=None,
-        vector_dim=None,
-        batch_metrics_publish_interval=None,
-        epochs=None,
-        learning_rate=None,
-        num_ip_encoder_layers=None,
-        random_negative_sampling_rate=None,
-        shuffled_negative_sampling_rate=None,
-        weight_decay=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        num_entity_vectors: Optional[int] = None,
+        vector_dim: Optional[int] = None,
+        batch_metrics_publish_interval: Optional[int] = None,
+        epochs: Optional[int] = None,
+        learning_rate: Optional[float] = None,
+        num_ip_encoder_layers: Optional[int] = None,
+        random_negative_sampling_rate: Optional[int] = None,
+        shuffled_negative_sampling_rate: Optional[int] = None,
+        weight_decay: Optional[float] = None,
         **kwargs
     ):
         """This estimator is for IP Insights.
@@ -106,9 +106,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.m5.xlarge'.
             num_entity_vectors (int): Required. The number of embeddings to
                 train for entities accessing online resources. We recommend 2x
diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py
index 964a4dfde4..a6a9a918a7 100644
--- a/src/sagemaker/amazon/kmeans.py
+++ b/src/sagemaker/amazon/kmeans.py
@@ -13,7 +13,7 @@
 """Placeholder docstring"""
 from __future__ import absolute_import
 
-from typing import Union, Optional
+from typing import Union, Optional, List
 
 from sagemaker import image_uris
 from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase
@@ -36,23 +36,25 @@ class KMeans(AmazonAlgorithmEstimatorBase):
     the algorithm to use to determine similarity.
     """
 
-    repo_name = "kmeans"
-    repo_version = 1
+    repo_name: str = "kmeans"
+    repo_version: str = "1"
 
-    k = hp("k", gt(1), "An integer greater-than 1", int)
-    init_method = hp("init_method", isin("random", "kmeans++"), 'One of "random", "kmeans++"', str)
-    max_iterations = hp("local_lloyd_max_iter", gt(0), "An integer greater-than 0", int)
-    tol = hp("local_lloyd_tol", (ge(0), le(1)), "An float in [0, 1]", float)
-    num_trials = hp("local_lloyd_num_trials", gt(0), "An integer greater-than 0", int)
-    local_init_method = hp(
+    k: hp = hp("k", gt(1), "An integer greater-than 1", int)
+    init_method: hp = hp(
+        "init_method", isin("random", "kmeans++"), 'One of "random", "kmeans++"', str
+    )
+    max_iterations: hp = hp("local_lloyd_max_iter", gt(0), "An integer greater-than 0", int)
+    tol: hp = hp("local_lloyd_tol", (ge(0), le(1)), "An float in [0, 1]", float)
+    num_trials: hp = hp("local_lloyd_num_trials", gt(0), "An integer greater-than 0", int)
+    local_init_method: hp = hp(
         "local_lloyd_init_method", isin("random", "kmeans++"), 'One of "random", "kmeans++"', str
     )
-    half_life_time_size = hp(
+    half_life_time_size: hp = hp(
         "half_life_time_size", ge(0), "An integer greater-than-or-equal-to 0", int
     )
-    epochs = hp("epochs", gt(0), "An integer greater-than 0", int)
-    center_factor = hp("extra_center_factor", gt(0), "An integer greater-than 0", int)
-    eval_metrics = hp(
+    epochs: hp = hp("epochs", gt(0), "An integer greater-than 0", int)
+    center_factor: hp = hp("extra_center_factor", gt(0), "An integer greater-than 0", int)
+    eval_metrics: hp = hp(
         name="eval_metrics",
         validation_message='A comma separated list of "msd" or "ssd"',
         data_type=list,
@@ -60,19 +62,19 @@ class KMeans(AmazonAlgorithmEstimatorBase):
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        k=None,
-        init_method=None,
-        max_iterations=None,
-        tol=None,
-        num_trials=None,
-        local_init_method=None,
-        half_life_time_size=None,
-        epochs=None,
-        center_factor=None,
-        eval_metrics=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        k: Optional[int] = None,
+        init_method: Optional[str] = None,
+        max_iterations: Optional[int] = None,
+        tol: Optional[float] = None,
+        num_trials: Optional[int] = None,
+        local_init_method: Optional[str] = None,
+        half_life_time_size: Optional[int] = None,
+        epochs: Optional[int] = None,
+        center_factor: Optional[int] = None,
+        eval_metrics: Optional[List[Union[str, PipelineVariable]]] = None,
         **kwargs
     ):
         """A k-means clustering class :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`.
@@ -113,9 +115,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             k (int): The number of clusters to produce.
             init_method (str): How to initialize cluster locations. One of
@@ -139,8 +141,8 @@ def __init__(
             center_factor (int): The algorithm will create
                 ``num_clusters * extra_center_factor`` as it runs and reduce the
                 number of centers to ``k`` when finalizing
-            eval_metrics (list): JSON list of metrics types to be used for
-                reporting the score for the model. Allowed values are "msd"
+            eval_metrics (list[str] or list[PipelineVariable]): JSON list of metrics types
+                to be used for reporting the score for the model. Allowed values are "msd"
                 Means Square Error, "ssd": Sum of square distance. If test data
                 is provided, the score shall be reported in terms of all
                 requested metrics.
diff --git a/src/sagemaker/amazon/knn.py b/src/sagemaker/amazon/knn.py
index a621d794fd..3ea63f1587 100644
--- a/src/sagemaker/amazon/knn.py
+++ b/src/sagemaker/amazon/knn.py
@@ -37,54 +37,54 @@ class KNN(AmazonAlgorithmEstimatorBase):
     the average of their feature values as the predicted value.
     """
 
-    repo_name = "knn"
-    repo_version = 1
+    repo_name: str = "knn"
+    repo_version: str = "1"
 
-    k = hp("k", (ge(1)), "An integer greater than 0", int)
-    sample_size = hp("sample_size", (ge(1)), "An integer greater than 0", int)
-    predictor_type = hp(
+    k: hp = hp("k", (ge(1)), "An integer greater than 0", int)
+    sample_size: hp = hp("sample_size", (ge(1)), "An integer greater than 0", int)
+    predictor_type: hp = hp(
         "predictor_type", isin("classifier", "regressor"), 'One of "classifier" or "regressor"', str
     )
-    dimension_reduction_target = hp(
+    dimension_reduction_target: hp = hp(
         "dimension_reduction_target",
         (ge(1)),
         "An integer greater than 0 and less than feature_dim",
         int,
     )
-    dimension_reduction_type = hp(
+    dimension_reduction_type: hp = hp(
         "dimension_reduction_type", isin("sign", "fjlt"), 'One of "sign" or "fjlt"', str
     )
-    index_metric = hp(
+    index_metric: hp = hp(
         "index_metric",
         isin("COSINE", "INNER_PRODUCT", "L2"),
         'One of "COSINE", "INNER_PRODUCT", "L2"',
         str,
     )
-    index_type = hp(
+    index_type: hp = hp(
         "index_type",
         isin("faiss.Flat", "faiss.IVFFlat", "faiss.IVFPQ"),
         'One of "faiss.Flat", "faiss.IVFFlat", "faiss.IVFPQ"',
         str,
     )
-    faiss_index_ivf_nlists = hp(
+    faiss_index_ivf_nlists: hp = hp(
         "faiss_index_ivf_nlists", (), '"auto" or an integer greater than 0', str
     )
-    faiss_index_pq_m = hp("faiss_index_pq_m", (ge(1)), "An integer greater than 0", int)
+    faiss_index_pq_m: hp = hp("faiss_index_pq_m", (ge(1)), "An integer greater than 0", int)
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        k=None,
-        sample_size=None,
-        predictor_type=None,
-        dimension_reduction_type=None,
-        dimension_reduction_target=None,
-        index_type=None,
-        index_metric=None,
-        faiss_index_ivf_nlists=None,
-        faiss_index_pq_m=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        k: Optional[int] = None,
+        sample_size: Optional[int] = None,
+        predictor_type: Optional[str] = None,
+        dimension_reduction_type: Optional[str] = None,
+        dimension_reduction_target: Optional[int] = None,
+        index_type: Optional[str] = None,
+        index_metric: Optional[str] = None,
+        faiss_index_ivf_nlists: Optional[str] = None,
+        faiss_index_pq_m: Optional[int] = None,
         **kwargs
     ):
         """k-nearest neighbors (KNN) is :class:`Estimator` used for classification and regression.
@@ -117,8 +117,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count:
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_count: (int or PipelineVariable): Number of Amazon EC2 instances to use
+                for training.
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             k (int): Required. Number of nearest neighbors.
             sample_size (int): Required. Number of data points to be sampled
diff --git a/src/sagemaker/amazon/lda.py b/src/sagemaker/amazon/lda.py
index fc1de1cea6..cb65d1f82e 100644
--- a/src/sagemaker/amazon/lda.py
+++ b/src/sagemaker/amazon/lda.py
@@ -13,6 +13,7 @@
 """Placeholder docstring"""
 from __future__ import absolute_import
 
+import logging
 from typing import Union, Optional
 
 from sagemaker import image_uris
@@ -26,6 +27,9 @@
 from sagemaker.utils import pop_out_unused_kwarg
 from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
 from sagemaker.workflow.entities import PipelineVariable
+from sagemaker.workflow import is_pipeline_variable
+
+logger = logging.getLogger(__name__)
 
 
 class LDA(AmazonAlgorithmEstimatorBase):
@@ -37,24 +41,24 @@ class LDA(AmazonAlgorithmEstimatorBase):
     word, and the categories are the topics.
     """
 
-    repo_name = "lda"
-    repo_version = 1
+    repo_name: str = "lda"
+    repo_version: str = "1"
 
-    num_topics = hp("num_topics", gt(0), "An integer greater than zero", int)
-    alpha0 = hp("alpha0", gt(0), "A positive float", float)
-    max_restarts = hp("max_restarts", gt(0), "An integer greater than zero", int)
-    max_iterations = hp("max_iterations", gt(0), "An integer greater than zero", int)
-    tol = hp("tol", gt(0), "A positive float", float)
+    num_topics: hp = hp("num_topics", gt(0), "An integer greater than zero", int)
+    alpha0: hp = hp("alpha0", gt(0), "A positive float", float)
+    max_restarts: hp = hp("max_restarts", gt(0), "An integer greater than zero", int)
+    max_iterations: hp = hp("max_iterations", gt(0), "An integer greater than zero", int)
+    tol: hp = hp("tol", gt(0), "A positive float", float)
 
     def __init__(
         self,
-        role,
-        instance_type=None,
-        num_topics=None,
-        alpha0=None,
-        max_restarts=None,
-        max_iterations=None,
-        tol=None,
+        role: str,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        num_topics: Optional[int] = None,
+        alpha0: Optional[float] = None,
+        max_restarts: Optional[int] = None,
+        max_iterations: Optional[int] = None,
+        tol: Optional[float] = None,
         **kwargs
     ):
         """Latent Dirichlet Allocation (LDA) is :class:`Estimator` used for unsupervised learning.
@@ -102,7 +106,7 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             num_topics (int): The number of topics for LDA to find within the
                 data.
@@ -124,11 +128,10 @@ def __init__(
             :class:`~sagemaker.estimator.EstimatorBase`.
         """
         # this algorithm only supports single instance training
-        if kwargs.pop("instance_count", 1) != 1:
-            print(
-                "LDA only supports single instance training. Defaulting to 1 {}.".format(
-                    instance_type
-                )
+        instance_count = kwargs.pop("instance_count", 1)
+        if is_pipeline_variable(instance_count) or instance_count != 1:
+            logger.warning(
+                "LDA only supports single instance training. Defaulting to 1 %s.", instance_type
             )
 
         super(LDA, self).__init__(role, 1, instance_type, **kwargs)
diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py
index 0b36b117dd..53663e9fec 100644
--- a/src/sagemaker/amazon/linear_learner.py
+++ b/src/sagemaker/amazon/linear_learner.py
@@ -13,6 +13,7 @@
 """Placeholder docstring"""
 from __future__ import absolute_import
 
+import logging
 from typing import Union, Optional
 
 from sagemaker import image_uris
@@ -27,6 +28,8 @@
 from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
 from sagemaker.workflow.entities import PipelineVariable
 
+logger = logging.getLogger(__name__)
+
 
 class LinearLearner(AmazonAlgorithmEstimatorBase):
     """A supervised learning algorithms used for solving classification or regression problems.
@@ -39,12 +42,12 @@ class LinearLearner(AmazonAlgorithmEstimatorBase):
     of the label y.
     """
 
-    repo_name = "linear-learner"
-    repo_version = 1
+    repo_name: str = "linear-learner"
+    repo_version: str = "1"
 
-    DEFAULT_MINI_BATCH_SIZE = 1000
+    DEFAULT_MINI_BATCH_SIZE: int = 1000
 
-    binary_classifier_model_selection_criteria = hp(
+    binary_classifier_model_selection_criteria: hp = hp(
         "binary_classifier_model_selection_criteria",
         isin(
             "accuracy",
@@ -57,32 +60,36 @@ class LinearLearner(AmazonAlgorithmEstimatorBase):
         ),
         data_type=str,
     )
-    target_recall = hp("target_recall", (gt(0), lt(1)), "A float in (0,1)", float)
-    target_precision = hp("target_precision", (gt(0), lt(1)), "A float in (0,1)", float)
-    positive_example_weight_mult = hp(
+    target_recall: hp = hp("target_recall", (gt(0), lt(1)), "A float in (0,1)", float)
+    target_precision: hp = hp("target_precision", (gt(0), lt(1)), "A float in (0,1)", float)
+    positive_example_weight_mult: hp = hp(
         "positive_example_weight_mult", (), "A float greater than 0 or 'auto' or 'balanced'", str
     )
-    epochs = hp("epochs", gt(0), "An integer greater-than 0", int)
-    predictor_type = hp(
+    epochs: hp = hp("epochs", gt(0), "An integer greater-than 0", int)
+    predictor_type: hp = hp(
         "predictor_type",
         isin("binary_classifier", "regressor", "multiclass_classifier"),
         'One of "binary_classifier" or "multiclass_classifier" or "regressor"',
         str,
     )
-    use_bias = hp("use_bias", (), "Either True or False", bool)
-    num_models = hp("num_models", gt(0), "An integer greater-than 0", int)
-    num_calibration_samples = hp("num_calibration_samples", gt(0), "An integer greater-than 0", int)
-    init_method = hp("init_method", isin("uniform", "normal"), 'One of "uniform" or "normal"', str)
-    init_scale = hp("init_scale", gt(0), "A float greater-than 0", float)
-    init_sigma = hp("init_sigma", gt(0), "A float greater-than 0", float)
-    init_bias = hp("init_bias", (), "A number", float)
-    optimizer = hp(
+    use_bias: hp = hp("use_bias", (), "Either True or False", bool)
+    num_models: hp = hp("num_models", gt(0), "An integer greater-than 0", int)
+    num_calibration_samples: hp = hp(
+        "num_calibration_samples", gt(0), "An integer greater-than 0", int
+    )
+    init_method: hp = hp(
+        "init_method", isin("uniform", "normal"), 'One of "uniform" or "normal"', str
+    )
+    init_scale: hp = hp("init_scale", gt(0), "A float greater-than 0", float)
+    init_sigma: hp = hp("init_sigma", gt(0), "A float greater-than 0", float)
+    init_bias: hp = hp("init_bias", (), "A number", float)
+    optimizer: hp = hp(
         "optimizer",
         isin("sgd", "adam", "rmsprop", "auto"),
         'One of "sgd", "adam", "rmsprop" or "auto',
         str,
     )
-    loss = hp(
+    loss: hp = hp(
         "loss",
         isin(
             "logistic",
@@ -100,83 +107,89 @@ class LinearLearner(AmazonAlgorithmEstimatorBase):
         ' "eps_insensitive_absolute_loss", "quantile_loss", "huber_loss", "softmax_loss" or "auto"',
         str,
     )
-    wd = hp("wd", ge(0), "A float greater-than or equal to 0", float)
-    l1 = hp("l1", ge(0), "A float greater-than or equal to 0", float)
-    momentum = hp("momentum", (ge(0), lt(1)), "A float in [0,1)", float)
-    learning_rate = hp("learning_rate", gt(0), "A float greater-than 0", float)
-    beta_1 = hp("beta_1", (ge(0), lt(1)), "A float in [0,1)", float)
-    beta_2 = hp("beta_2", (ge(0), lt(1)), "A float in [0,1)", float)
-    bias_lr_mult = hp("bias_lr_mult", gt(0), "A float greater-than 0", float)
-    bias_wd_mult = hp("bias_wd_mult", ge(0), "A float greater-than or equal to 0", float)
-    use_lr_scheduler = hp("use_lr_scheduler", (), "A boolean", bool)
-    lr_scheduler_step = hp("lr_scheduler_step", gt(0), "An integer greater-than 0", int)
-    lr_scheduler_factor = hp("lr_scheduler_factor", (gt(0), lt(1)), "A float in (0,1)", float)
-    lr_scheduler_minimum_lr = hp("lr_scheduler_minimum_lr", gt(0), "A float greater-than 0", float)
-    normalize_data = hp("normalize_data", (), "A boolean", bool)
-    normalize_label = hp("normalize_label", (), "A boolean", bool)
-    unbias_data = hp("unbias_data", (), "A boolean", bool)
-    unbias_label = hp("unbias_label", (), "A boolean", bool)
-    num_point_for_scaler = hp("num_point_for_scaler", gt(0), "An integer greater-than 0", int)
-    margin = hp("margin", ge(0), "A float greater-than or equal to 0", float)
-    quantile = hp("quantile", (gt(0), lt(1)), "A float in (0,1)", float)
-    loss_insensitivity = hp("loss_insensitivity", gt(0), "A float greater-than 0", float)
-    huber_delta = hp("huber_delta", ge(0), "A float greater-than or equal to 0", float)
-    early_stopping_patience = hp("early_stopping_patience", gt(0), "An integer greater-than 0", int)
-    early_stopping_tolerance = hp(
+    wd: hp = hp("wd", ge(0), "A float greater-than or equal to 0", float)
+    l1: hp = hp("l1", ge(0), "A float greater-than or equal to 0", float)
+    momentum: hp = hp("momentum", (ge(0), lt(1)), "A float in [0,1)", float)
+    learning_rate: hp = hp("learning_rate", gt(0), "A float greater-than 0", float)
+    beta_1: hp = hp("beta_1", (ge(0), lt(1)), "A float in [0,1)", float)
+    beta_2: hp = hp("beta_2", (ge(0), lt(1)), "A float in [0,1)", float)
+    bias_lr_mult: hp = hp("bias_lr_mult", gt(0), "A float greater-than 0", float)
+    bias_wd_mult: hp = hp("bias_wd_mult", ge(0), "A float greater-than or equal to 0", float)
+    use_lr_scheduler: hp = hp("use_lr_scheduler", (), "A boolean", bool)
+    lr_scheduler_step: hp = hp("lr_scheduler_step", gt(0), "An integer greater-than 0", int)
+    lr_scheduler_factor: hp = hp("lr_scheduler_factor", (gt(0), lt(1)), "A float in (0,1)", float)
+    lr_scheduler_minimum_lr: hp = hp(
+        "lr_scheduler_minimum_lr", gt(0), "A float greater-than 0", float
+    )
+    normalize_data: hp = hp("normalize_data", (), "A boolean", bool)
+    normalize_label: hp = hp("normalize_label", (), "A boolean", bool)
+    unbias_data: hp = hp("unbias_data", (), "A boolean", bool)
+    unbias_label: hp = hp("unbias_label", (), "A boolean", bool)
+    num_point_for_scaler: hp = hp("num_point_for_scaler", gt(0), "An integer greater-than 0", int)
+    margin: hp = hp("margin", ge(0), "A float greater-than or equal to 0", float)
+    quantile: hp = hp("quantile", (gt(0), lt(1)), "A float in (0,1)", float)
+    loss_insensitivity: hp = hp("loss_insensitivity", gt(0), "A float greater-than 0", float)
+    huber_delta: hp = hp("huber_delta", ge(0), "A float greater-than or equal to 0", float)
+    early_stopping_patience: hp = hp(
+        "early_stopping_patience", gt(0), "An integer greater-than 0", int
+    )
+    early_stopping_tolerance: hp = hp(
         "early_stopping_tolerance", gt(0), "A float greater-than 0", float
     )
-    num_classes = hp("num_classes", (gt(0), le(1000000)), "An integer in [1,1000000]", int)
-    accuracy_top_k = hp("accuracy_top_k", (gt(0), le(1000000)), "An integer in [1,1000000]", int)
-    f_beta = hp("f_beta", gt(0), "A float greater-than 0", float)
-    balance_multiclass_weights = hp("balance_multiclass_weights", (), "A boolean", bool)
+    num_classes: hp = hp("num_classes", (gt(0), le(1000000)), "An integer in [1,1000000]", int)
+    accuracy_top_k: hp = hp(
+        "accuracy_top_k", (gt(0), le(1000000)), "An integer in [1,1000000]", int
+    )
+    f_beta: hp = hp("f_beta", gt(0), "A float greater-than 0", float)
+    balance_multiclass_weights: hp = hp("balance_multiclass_weights", (), "A boolean", bool)
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        predictor_type=None,
-        binary_classifier_model_selection_criteria=None,
-        target_recall=None,
-        target_precision=None,
-        positive_example_weight_mult=None,
-        epochs=None,
-        use_bias=None,
-        num_models=None,
-        num_calibration_samples=None,
-        init_method=None,
-        init_scale=None,
-        init_sigma=None,
-        init_bias=None,
-        optimizer=None,
-        loss=None,
-        wd=None,
-        l1=None,
-        momentum=None,
-        learning_rate=None,
-        beta_1=None,
-        beta_2=None,
-        bias_lr_mult=None,
-        bias_wd_mult=None,
-        use_lr_scheduler=None,
-        lr_scheduler_step=None,
-        lr_scheduler_factor=None,
-        lr_scheduler_minimum_lr=None,
-        normalize_data=None,
-        normalize_label=None,
-        unbias_data=None,
-        unbias_label=None,
-        num_point_for_scaler=None,
-        margin=None,
-        quantile=None,
-        loss_insensitivity=None,
-        huber_delta=None,
-        early_stopping_patience=None,
-        early_stopping_tolerance=None,
-        num_classes=None,
-        accuracy_top_k=None,
-        f_beta=None,
-        balance_multiclass_weights=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        predictor_type: Optional[str] = None,
+        binary_classifier_model_selection_criteria: Optional[str] = None,
+        target_recall: Optional[float] = None,
+        target_precision: Optional[float] = None,
+        positive_example_weight_mult: Optional[float] = None,
+        epochs: Optional[int] = None,
+        use_bias: Optional[bool] = None,
+        num_models: Optional[int] = None,
+        num_calibration_samples: Optional[int] = None,
+        init_method: Optional[str] = None,
+        init_scale: Optional[float] = None,
+        init_sigma: Optional[float] = None,
+        init_bias: Optional[float] = None,
+        optimizer: Optional[str] = None,
+        loss: Optional[str] = None,
+        wd: Optional[float] = None,
+        l1: Optional[float] = None,
+        momentum: Optional[float] = None,
+        learning_rate: Optional[float] = None,
+        beta_1: Optional[float] = None,
+        beta_2: Optional[float] = None,
+        bias_lr_mult: Optional[float] = None,
+        bias_wd_mult: Optional[float] = None,
+        use_lr_scheduler: Optional[bool] = None,
+        lr_scheduler_step: Optional[int] = None,
+        lr_scheduler_factor: Optional[float] = None,
+        lr_scheduler_minimum_lr: Optional[float] = None,
+        normalize_data: Optional[bool] = None,
+        normalize_label: Optional[bool] = None,
+        unbias_data: Optional[bool] = None,
+        unbias_label: Optional[bool] = None,
+        num_point_for_scaler: Optional[int] = None,
+        margin: Optional[float] = None,
+        quantile: Optional[float] = None,
+        loss_insensitivity: Optional[float] = None,
+        huber_delta: Optional[float] = None,
+        early_stopping_patience: Optional[int] = None,
+        early_stopping_tolerance: Optional[float] = None,
+        num_classes: Optional[int] = None,
+        accuracy_top_k: Optional[int] = None,
+        f_beta: Optional[float] = None,
+        balance_multiclass_weights: Optional[bool] = None,
         **kwargs
     ):
         """An :class:`Estimator` for binary classification and regression.
@@ -227,9 +240,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             predictor_type (str): The type of predictor to learn. Either
                 "binary_classifier" or "multiclass_classifier" or "regressor".
@@ -309,21 +322,21 @@ def __init__(
                 metric evaluation, compute L2 loss for errors smaller than delta and L1 loss for
                 errors larger than delta.
             early_stopping_patience (int): the number of epochs to wait before ending training
-            if no improvement is made. The improvement is training loss if validation data is
-            not provided, or else it is the validation loss or the binary classification model
-            selection criteria like accuracy, f1-score etc. To disable early stopping,
-            set early_stopping_patience to a value larger than epochs.
+                if no improvement is made. The improvement is training loss if validation data is
+                not provided, or else it is the validation loss or the binary classification model
+                selection criteria like accuracy, f1-score etc. To disable early stopping,
+                set early_stopping_patience to a value larger than epochs.
             early_stopping_tolerance (float): Relative tolerance to measure an
                 improvement in loss. If the ratio of the improvement in loss divided by the
                 previous best loss is smaller than this value, early stopping will
-            consider the improvement to be zero.
+                consider the improvement to be zero.
             num_classes (int): The number of classes for the response variable.
                 Required when predictor_type is multiclass_classifier and ignored otherwise. The
                 classes are assumed to be labeled 0, ..., num_classes - 1.
             accuracy_top_k (int): The value of k when computing the Top K
                 Accuracy metric for multiclass classification. An example is scored as correct
                 if the model assigns one of the top k scores to the true
-            label.
+                label.
             f_beta (float): The value of beta to use when calculating F score
                 metrics for binary or multiclass classification. Also used if
                 binary_classifier_model_selection_criteria is f_beta.
@@ -424,10 +437,8 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
             num_records = records.num_records
 
         # mini_batch_size can't be greater than number of records or training job fails
-        default_mini_batch_size = min(
-            self.DEFAULT_MINI_BATCH_SIZE, max(1, int(num_records / self.instance_count))
-        )
-        mini_batch_size = mini_batch_size or default_mini_batch_size
+        mini_batch_size = mini_batch_size or self._get_default_mini_batch_size(num_records)
+
         super(LinearLearner, self)._prepare_for_training(
             records, mini_batch_size=mini_batch_size, job_name=job_name
         )
diff --git a/src/sagemaker/amazon/ntm.py b/src/sagemaker/amazon/ntm.py
index ddcc619ada..f43980eac8 100644
--- a/src/sagemaker/amazon/ntm.py
+++ b/src/sagemaker/amazon/ntm.py
@@ -13,7 +13,7 @@
 """Placeholder docstring"""
 from __future__ import absolute_import
 
-from typing import Union, Optional
+from typing import Optional, Union, List
 
 from sagemaker import image_uris
 from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase
@@ -36,53 +36,59 @@ class NTM(AmazonAlgorithmEstimatorBase):
     "mileage", and "speed" are likely to share a topic on "transportation" for example.
     """
 
-    repo_name = "ntm"
-    repo_version = 1
+    repo_name: str = "ntm"
+    repo_version: str = "1"
 
-    num_topics = hp("num_topics", (ge(2), le(1000)), "An integer in [2, 1000]", int)
-    encoder_layers = hp(
+    num_topics: hp = hp("num_topics", (ge(2), le(1000)), "An integer in [2, 1000]", int)
+    encoder_layers: hp = hp(
         name="encoder_layers",
         validation_message="A comma separated list of " "positive integers",
         data_type=list,
     )
-    epochs = hp("epochs", (ge(1), le(100)), "An integer in [1, 100]", int)
-    encoder_layers_activation = hp(
+    epochs: hp = hp("epochs", (ge(1), le(100)), "An integer in [1, 100]", int)
+    encoder_layers_activation: hp = hp(
         "encoder_layers_activation",
         isin("sigmoid", "tanh", "relu"),
         'One of "sigmoid", "tanh" or "relu"',
         str,
     )
-    optimizer = hp(
+    optimizer: hp = hp(
         "optimizer",
         isin("adagrad", "adam", "rmsprop", "sgd", "adadelta"),
         'One of "adagrad", "adam", "rmsprop", "sgd" and "adadelta"',
         str,
     )
-    tolerance = hp("tolerance", (ge(1e-6), le(0.1)), "A float in [1e-6, 0.1]", float)
-    num_patience_epochs = hp("num_patience_epochs", (ge(1), le(10)), "An integer in [1, 10]", int)
-    batch_norm = hp(name="batch_norm", validation_message="Value must be a boolean", data_type=bool)
-    rescale_gradient = hp("rescale_gradient", (ge(1e-3), le(1.0)), "A float in [1e-3, 1.0]", float)
-    clip_gradient = hp("clip_gradient", ge(1e-3), "A float greater equal to 1e-3", float)
-    weight_decay = hp("weight_decay", (ge(0.0), le(1.0)), "A float in [0.0, 1.0]", float)
-    learning_rate = hp("learning_rate", (ge(1e-6), le(1.0)), "A float in [1e-6, 1.0]", float)
+    tolerance: hp = hp("tolerance", (ge(1e-6), le(0.1)), "A float in [1e-6, 0.1]", float)
+    num_patience_epochs: hp = hp(
+        "num_patience_epochs", (ge(1), le(10)), "An integer in [1, 10]", int
+    )
+    batch_norm: hp = hp(
+        name="batch_norm", validation_message="Value must be a boolean", data_type=bool
+    )
+    rescale_gradient: hp = hp(
+        "rescale_gradient", (ge(1e-3), le(1.0)), "A float in [1e-3, 1.0]", float
+    )
+    clip_gradient: hp = hp("clip_gradient", ge(1e-3), "A float greater equal to 1e-3", float)
+    weight_decay: hp = hp("weight_decay", (ge(0.0), le(1.0)), "A float in [0.0, 1.0]", float)
+    learning_rate: hp = hp("learning_rate", (ge(1e-6), le(1.0)), "A float in [1e-6, 1.0]", float)
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        num_topics=None,
-        encoder_layers=None,
-        epochs=None,
-        encoder_layers_activation=None,
-        optimizer=None,
-        tolerance=None,
-        num_patience_epochs=None,
-        batch_norm=None,
-        rescale_gradient=None,
-        clip_gradient=None,
-        weight_decay=None,
-        learning_rate=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        num_topics: Optional[int] = None,
+        encoder_layers: Optional[List] = None,
+        epochs: Optional[int] = None,
+        encoder_layers_activation: Optional[str] = None,
+        optimizer: Optional[str] = None,
+        tolerance: Optional[float] = None,
+        num_patience_epochs: Optional[int] = None,
+        batch_norm: Optional[bool] = None,
+        rescale_gradient: Optional[float] = None,
+        clip_gradient: Optional[float] = None,
+        weight_decay: Optional[float] = None,
+        learning_rate: Optional[float] = None,
         **kwargs
     ):
         """Neural Topic Model (NTM) is :class:`Estimator` used for unsupervised learning.
@@ -122,8 +128,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count:
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_count: (int or PipelineVariable): Number of Amazon EC2 instances to use
+                for training.
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             num_topics (int): Required. The number of topics for NTM to find
                 within the data.
diff --git a/src/sagemaker/amazon/object2vec.py b/src/sagemaker/amazon/object2vec.py
index 6177c742ba..efc1105fd7 100644
--- a/src/sagemaker/amazon/object2vec.py
+++ b/src/sagemaker/amazon/object2vec.py
@@ -53,132 +53,142 @@ class Object2Vec(AmazonAlgorithmEstimatorBase):
     objects in the original space in the embedding space.
     """
 
-    repo_name = "object2vec"
-    repo_version = 1
-    MINI_BATCH_SIZE = 32
-
-    enc_dim = hp("enc_dim", (ge(4), le(10000)), "An integer in [4, 10000]", int)
-    mini_batch_size = hp("mini_batch_size", (ge(1), le(10000)), "An integer in [1, 10000]", int)
-    epochs = hp("epochs", (ge(1), le(100)), "An integer in [1, 100]", int)
-    early_stopping_patience = hp(
+    repo_name: str = "object2vec"
+    repo_version: str = "1"
+    MINI_BATCH_SIZE: int = 32
+
+    enc_dim: hp = hp("enc_dim", (ge(4), le(10000)), "An integer in [4, 10000]", int)
+    mini_batch_size: hp = hp("mini_batch_size", (ge(1), le(10000)), "An integer in [1, 10000]", int)
+    epochs: hp = hp("epochs", (ge(1), le(100)), "An integer in [1, 100]", int)
+    early_stopping_patience: hp = hp(
         "early_stopping_patience", (ge(1), le(5)), "An integer in [1, 5]", int
     )
-    early_stopping_tolerance = hp(
+    early_stopping_tolerance: hp = hp(
         "early_stopping_tolerance", (ge(1e-06), le(0.1)), "A float in [1e-06, 0.1]", float
     )
-    dropout = hp("dropout", (ge(0.0), le(1.0)), "A float in [0.0, 1.0]", float)
-    weight_decay = hp("weight_decay", (ge(0.0), le(10000.0)), "A float in [0.0, 10000.0]", float)
-    bucket_width = hp("bucket_width", (ge(0), le(100)), "An integer in [0, 100]", int)
-    num_classes = hp("num_classes", (ge(2), le(30)), "An integer in [2, 30]", int)
-    mlp_layers = hp("mlp_layers", (ge(1), le(10)), "An integer in [1, 10]", int)
-    mlp_dim = hp("mlp_dim", (ge(2), le(10000)), "An integer in [2, 10000]", int)
-    mlp_activation = hp(
+    dropout: hp = hp("dropout", (ge(0.0), le(1.0)), "A float in [0.0, 1.0]", float)
+    weight_decay: hp = hp(
+        "weight_decay", (ge(0.0), le(10000.0)), "A float in [0.0, 10000.0]", float
+    )
+    bucket_width: hp = hp("bucket_width", (ge(0), le(100)), "An integer in [0, 100]", int)
+    num_classes: hp = hp("num_classes", (ge(2), le(30)), "An integer in [2, 30]", int)
+    mlp_layers: hp = hp("mlp_layers", (ge(1), le(10)), "An integer in [1, 10]", int)
+    mlp_dim: hp = hp("mlp_dim", (ge(2), le(10000)), "An integer in [2, 10000]", int)
+    mlp_activation: hp = hp(
         "mlp_activation", isin("tanh", "relu", "linear"), 'One of "tanh", "relu", "linear"', str
     )
-    output_layer = hp(
+    output_layer: hp = hp(
         "output_layer",
         isin("softmax", "mean_squared_error"),
         'One of "softmax", "mean_squared_error"',
         str,
     )
-    optimizer = hp(
+    optimizer: hp = hp(
         "optimizer",
         isin("adagrad", "adam", "rmsprop", "sgd", "adadelta"),
         'One of "adagrad", "adam", "rmsprop", "sgd", "adadelta"',
         str,
     )
-    learning_rate = hp("learning_rate", (ge(1e-06), le(1.0)), "A float in [1e-06, 1.0]", float)
+    learning_rate: hp = hp("learning_rate", (ge(1e-06), le(1.0)), "A float in [1e-06, 1.0]", float)
 
-    negative_sampling_rate = hp(
+    negative_sampling_rate: hp = hp(
         "negative_sampling_rate", (ge(0), le(100)), "An integer in [0, 100]", int
     )
-    comparator_list = hp(
+    comparator_list: hp = hp(
         "comparator_list",
         _list_check_subset(["hadamard", "concat", "abs_diff"]),
         'Comma-separated of hadamard, concat, abs_diff. E.g. "hadamard,abs_diff"',
         str,
     )
-    tied_token_embedding_weight = hp(
+    tied_token_embedding_weight: hp = hp(
         "tied_token_embedding_weight", (), "Either True or False", bool
     )
-    token_embedding_storage_type = hp(
+    token_embedding_storage_type: hp = hp(
         "token_embedding_storage_type",
         isin("dense", "row_sparse"),
         'One of "dense", "row_sparse"',
         str,
     )
 
-    enc0_network = hp(
+    enc0_network: hp = hp(
         "enc0_network",
         isin("hcnn", "bilstm", "pooled_embedding"),
         'One of "hcnn", "bilstm", "pooled_embedding"',
         str,
     )
-    enc1_network = hp(
+    enc1_network: hp = hp(
         "enc1_network",
         isin("hcnn", "bilstm", "pooled_embedding", "enc0"),
         'One of "hcnn", "bilstm", "pooled_embedding", "enc0"',
         str,
     )
-    enc0_cnn_filter_width = hp("enc0_cnn_filter_width", (ge(1), le(9)), "An integer in [1, 9]", int)
-    enc1_cnn_filter_width = hp("enc1_cnn_filter_width", (ge(1), le(9)), "An integer in [1, 9]", int)
-    enc0_max_seq_len = hp("enc0_max_seq_len", (ge(1), le(5000)), "An integer in [1, 5000]", int)
-    enc1_max_seq_len = hp("enc1_max_seq_len", (ge(1), le(5000)), "An integer in [1, 5000]", int)
-    enc0_token_embedding_dim = hp(
+    enc0_cnn_filter_width: hp = hp(
+        "enc0_cnn_filter_width", (ge(1), le(9)), "An integer in [1, 9]", int
+    )
+    enc1_cnn_filter_width: hp = hp(
+        "enc1_cnn_filter_width", (ge(1), le(9)), "An integer in [1, 9]", int
+    )
+    enc0_max_seq_len: hp = hp("enc0_max_seq_len", (ge(1), le(5000)), "An integer in [1, 5000]", int)
+    enc1_max_seq_len: hp = hp("enc1_max_seq_len", (ge(1), le(5000)), "An integer in [1, 5000]", int)
+    enc0_token_embedding_dim: hp = hp(
         "enc0_token_embedding_dim", (ge(2), le(1000)), "An integer in [2, 1000]", int
     )
-    enc1_token_embedding_dim = hp(
+    enc1_token_embedding_dim: hp = hp(
         "enc1_token_embedding_dim", (ge(2), le(1000)), "An integer in [2, 1000]", int
     )
-    enc0_vocab_size = hp("enc0_vocab_size", (ge(2), le(3000000)), "An integer in [2, 3000000]", int)
-    enc1_vocab_size = hp("enc1_vocab_size", (ge(2), le(3000000)), "An integer in [2, 3000000]", int)
-    enc0_layers = hp("enc0_layers", (ge(1), le(4)), "An integer in [1, 4]", int)
-    enc1_layers = hp("enc1_layers", (ge(1), le(4)), "An integer in [1, 4]", int)
-    enc0_freeze_pretrained_embedding = hp(
+    enc0_vocab_size: hp = hp(
+        "enc0_vocab_size", (ge(2), le(3000000)), "An integer in [2, 3000000]", int
+    )
+    enc1_vocab_size: hp = hp(
+        "enc1_vocab_size", (ge(2), le(3000000)), "An integer in [2, 3000000]", int
+    )
+    enc0_layers: hp = hp("enc0_layers", (ge(1), le(4)), "An integer in [1, 4]", int)
+    enc1_layers: hp = hp("enc1_layers", (ge(1), le(4)), "An integer in [1, 4]", int)
+    enc0_freeze_pretrained_embedding: hp = hp(
         "enc0_freeze_pretrained_embedding", (), "Either True or False", bool
     )
-    enc1_freeze_pretrained_embedding = hp(
+    enc1_freeze_pretrained_embedding: hp = hp(
         "enc1_freeze_pretrained_embedding", (), "Either True or False", bool
     )
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        epochs=None,
-        enc0_max_seq_len=None,
-        enc0_vocab_size=None,
-        enc_dim=None,
-        mini_batch_size=None,
-        early_stopping_patience=None,
-        early_stopping_tolerance=None,
-        dropout=None,
-        weight_decay=None,
-        bucket_width=None,
-        num_classes=None,
-        mlp_layers=None,
-        mlp_dim=None,
-        mlp_activation=None,
-        output_layer=None,
-        optimizer=None,
-        learning_rate=None,
-        negative_sampling_rate=None,
-        comparator_list=None,
-        tied_token_embedding_weight=None,
-        token_embedding_storage_type=None,
-        enc0_network=None,
-        enc1_network=None,
-        enc0_cnn_filter_width=None,
-        enc1_cnn_filter_width=None,
-        enc1_max_seq_len=None,
-        enc0_token_embedding_dim=None,
-        enc1_token_embedding_dim=None,
-        enc1_vocab_size=None,
-        enc0_layers=None,
-        enc1_layers=None,
-        enc0_freeze_pretrained_embedding=None,
-        enc1_freeze_pretrained_embedding=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        epochs: Optional[int] = None,
+        enc0_max_seq_len: Optional[int] = None,
+        enc0_vocab_size: Optional[int] = None,
+        enc_dim: Optional[int] = None,
+        mini_batch_size: Optional[int] = None,
+        early_stopping_patience: Optional[int] = None,
+        early_stopping_tolerance: Optional[float] = None,
+        dropout: Optional[float] = None,
+        weight_decay: Optional[float] = None,
+        bucket_width: Optional[int] = None,
+        num_classes: Optional[int] = None,
+        mlp_layers: Optional[int] = None,
+        mlp_dim: Optional[int] = None,
+        mlp_activation: Optional[str] = None,
+        output_layer: Optional[str] = None,
+        optimizer: Optional[str] = None,
+        learning_rate: Optional[float] = None,
+        negative_sampling_rate: Optional[int] = None,
+        comparator_list: Optional[str] = None,
+        tied_token_embedding_weight: Optional[bool] = None,
+        token_embedding_storage_type: Optional[str] = None,
+        enc0_network: Optional[str] = None,
+        enc1_network: Optional[str] = None,
+        enc0_cnn_filter_width: Optional[int] = None,
+        enc1_cnn_filter_width: Optional[int] = None,
+        enc1_max_seq_len: Optional[int] = None,
+        enc0_token_embedding_dim: Optional[int] = None,
+        enc1_token_embedding_dim: Optional[int] = None,
+        enc1_vocab_size: Optional[int] = None,
+        enc0_layers: Optional[int] = None,
+        enc1_layers: Optional[int] = None,
+        enc0_freeze_pretrained_embedding: Optional[bool] = None,
+        enc1_freeze_pretrained_embedding: Optional[bool] = None,
         **kwargs
     ):
         """Object2Vec is :class:`Estimator` used for anomaly detection.
@@ -212,9 +222,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             epochs (int): Total number of epochs for SGD training
             enc0_max_seq_len (int): Maximum sequence length
diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py
index 6236d20bef..7c0fec94de 100644
--- a/src/sagemaker/amazon/pca.py
+++ b/src/sagemaker/amazon/pca.py
@@ -35,22 +35,24 @@ class PCA(AmazonAlgorithmEstimatorBase):
     retain as much information as possible.
     """
 
-    repo_name = "pca"
-    repo_version = 1
+    repo_name: str = "pca"
+    repo_version: str = "1"
 
-    DEFAULT_MINI_BATCH_SIZE = 500
+    DEFAULT_MINI_BATCH_SIZE: int = 500
 
-    num_components = hp("num_components", gt(0), "Value must be an integer greater than zero", int)
-    algorithm_mode = hp(
+    num_components: hp = hp(
+        "num_components", gt(0), "Value must be an integer greater than zero", int
+    )
+    algorithm_mode: hp = hp(
         "algorithm_mode",
         isin("regular", "randomized"),
         'Value must be one of "regular" and "randomized"',
         str,
     )
-    subtract_mean = hp(
+    subtract_mean: hp = hp(
         name="subtract_mean", validation_message="Value must be a boolean", data_type=bool
     )
-    extra_components = hp(
+    extra_components: hp = hp(
         name="extra_components",
         validation_message="Value must be an integer greater than or equal to 0, or -1.",
         data_type=int,
@@ -58,13 +60,13 @@ class PCA(AmazonAlgorithmEstimatorBase):
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        num_components=None,
-        algorithm_mode=None,
-        subtract_mean=None,
-        extra_components=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        num_components: Optional[int] = None,
+        algorithm_mode: Optional[str] = None,
+        subtract_mean: Optional[bool] = None,
+        extra_components: Optional[int] = None,
         **kwargs
     ):
         """A Principal Components Analysis (PCA)
@@ -107,9 +109,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             num_components (int): The number of principal components. Must be
                 greater than zero.
@@ -179,10 +181,7 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
             num_records = records.num_records
 
         # mini_batch_size is a required parameter
-        default_mini_batch_size = min(
-            self.DEFAULT_MINI_BATCH_SIZE, max(1, int(num_records / self.instance_count))
-        )
-        use_mini_batch_size = mini_batch_size or default_mini_batch_size
+        use_mini_batch_size = mini_batch_size or self._get_default_mini_batch_size(num_records)
 
         super(PCA, self)._prepare_for_training(
             records=records, mini_batch_size=use_mini_batch_size, job_name=job_name
diff --git a/src/sagemaker/amazon/randomcutforest.py b/src/sagemaker/amazon/randomcutforest.py
index d690b1825e..5fb708b91b 100644
--- a/src/sagemaker/amazon/randomcutforest.py
+++ b/src/sagemaker/amazon/randomcutforest.py
@@ -13,7 +13,7 @@
 """Placeholder docstring"""
 from __future__ import absolute_import
 
-from typing import Optional, Union
+from typing import Optional, Union, List
 
 from sagemaker import image_uris
 from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase
@@ -36,30 +36,30 @@ class RandomCutForest(AmazonAlgorithmEstimatorBase):
     or unclassifiable data points.
     """
 
-    repo_name = "randomcutforest"
-    repo_version = 1
-    MINI_BATCH_SIZE = 1000
+    repo_name: str = "randomcutforest"
+    repo_version: str = "1"
+    MINI_BATCH_SIZE: int = 1000
 
-    eval_metrics = hp(
+    eval_metrics: hp = hp(
         name="eval_metrics",
         validation_message='A comma separated list of "accuracy" or "precision_recall_fscore"',
         data_type=list,
     )
 
-    num_trees = hp("num_trees", (ge(50), le(1000)), "An integer in [50, 1000]", int)
-    num_samples_per_tree = hp(
+    num_trees: hp = hp("num_trees", (ge(50), le(1000)), "An integer in [50, 1000]", int)
+    num_samples_per_tree: hp = hp(
         "num_samples_per_tree", (ge(1), le(2048)), "An integer in [1, 2048]", int
     )
-    feature_dim = hp("feature_dim", (ge(1), le(10000)), "An integer in [1, 10000]", int)
+    feature_dim: hp = hp("feature_dim", (ge(1), le(10000)), "An integer in [1, 10000]", int)
 
     def __init__(
         self,
-        role,
-        instance_count=None,
-        instance_type=None,
-        num_samples_per_tree=None,
-        num_trees=None,
-        eval_metrics=None,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        num_samples_per_tree: Optional[int] = None,
+        num_trees: Optional[int] = None,
+        eval_metrics: Optional[List] = None,
         **kwargs
     ):
         """An `Estimator` class implementing a Random Cut Forest.
@@ -100,9 +100,9 @@ def __init__(
                 endpoints use this role to access training data and model
                 artifacts. After the endpoint is created, the inference code
                 might use the IAM role, if accessing AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
                 for training.
-            instance_type (str): Type of EC2 instance to use for training,
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
                 for example, 'ml.c4.xlarge'.
             num_samples_per_tree (int): Optional. The number of samples used to
                 build each tree in the forest. The total number of samples drawn