tencentcloudstack · Nov 28, 2024
diff --git a/‎tencentcloud/services/tke/resource_tc_kubernetes_node_pool.go
Lines changed: 13 additions & 0 deletions b/‎tencentcloud/services/tke/resource_tc_kubernetes_node_pool.go
Lines changed: 13 additions & 0 deletions
diff --git a/‎tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
Lines changed: 68 additions & 0 deletions b/‎tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
Lines changed: 68 additions & 0 deletions
diff --git a/‎tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
Lines changed: 158 additions & 0 deletions b/‎tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
Lines changed: 158 additions & 0 deletions
diff --git a/‎website/docs/r/kubernetes_node_pool.html.markdown
Lines changed: 71 additions & 0 deletions b/‎website/docs/r/kubernetes_node_pool.html.markdown
Lines changed: 71 additions & 0 deletions
@@ -6,6 +6,8 @@ Provide a resource to create an auto scaling group for kubernetes cluster.
 
 ~> **NOTE:**  In order to ensure the integrity of customer data, if the cvm instance was destroyed due to shrinking, it will keep the cbs associate with cvm by default. If you want to destroy together, please set `delete_with_instance` to `true`.
 
+~> **NOTE:**  There are two parameters `wait_node_ready` and `scale_tolerance` to ensure better management of node pool scaling operations. If this parameter is set, when creating resources, if the set criteria are not met, the resources will be marked as `tainted`.
+
 Example Usage
 
 ```hcl
@@ -143,6 +145,72 @@ resource "tencentcloud_kubernetes_node_pool" "example" {
 }
 ```
 
+Set `wait_node_ready` and `scale_tolerance`
+```hcl
+resource "tencentcloud_kubernetes_node_pool" "example" {
+  name                     = "tf-example"
+  cluster_id               = tencentcloud_kubernetes_cluster.managed_cluster.id
+  max_size                 = 100
+  min_size                 = 1
+  vpc_id                   = data.tencentcloud_vpc_subnets.vpc.instance_list.0.vpc_id
+  subnet_ids               = [data.tencentcloud_vpc_subnets.vpc.instance_list.0.subnet_id]
+  retry_policy             = "INCREMENTAL_INTERVALS"
+  desired_capacity         = 50
+  enable_auto_scale        = false
+  wait_node_ready          = true
+  scale_tolerance          = 90
+  multi_zone_subnet_policy = "EQUALITY"
+  node_os                  = "img-6n21msk1"
+  delete_keep_instance     = false
+
+  auto_scaling_config {
+    instance_type              = var.default_instance_type
+    system_disk_type           = "CLOUD_PREMIUM"
+    system_disk_size           = "50"
+    orderly_security_group_ids = ["sg-bw28gmso"]
+
+    data_disk {
+      disk_type            = "CLOUD_PREMIUM"
+      disk_size            = 50
+      delete_with_instance = true
+    }
+
+    internet_charge_type       = "TRAFFIC_POSTPAID_BY_HOUR"
+    internet_max_bandwidth_out = 10
+    public_ip_assigned         = true
+    password                  = "test123#"
+    enhanced_security_service = false
+    enhanced_monitor_service  = false
+    host_name                 = "12.123.0.0"
+    host_name_style           = "ORIGINAL"
+  }
+
+  labels = {
+    "test1" = "test1",
+    "test2" = "test2",
+  }
+
+  taints {
+    key    = "test_taint"
+    value  = "taint_value"
+    effect = "PreferNoSchedule"
+  }
+
+  taints {
+    key    = "test_taint2"
+    value  = "taint_value2"
+    effect = "PreferNoSchedule"
+  }
+
+  node_config {
+    docker_graph_path = "/var/lib/docker"
+    extra_args = [
+      "root-dir=/var/lib/kubelet"
+    ]
+  }
+}
+```
+
 Import
 
 tke node pool can be imported, e.g.
 
@@ -6,6 +6,7 @@ import (
 	"log"
 	"strings"
 
+	"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common"
 	tchttp "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/http"
 
 	as "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/as/v20180419"
@@ -54,6 +55,11 @@ func resourceTencentCloudKubernetesNodePoolCreatePostFillRequest0(ctx context.Co
 		return fmt.Errorf("need only one auto_scaling_config")
 	}
 
+	// check params
+	if err := checkParams(ctx); err != nil {
+		return err
+	}
+
 	groupParaStr, err := composeParameterToAsScalingGroupParaSerial(d)
 	if err != nil {
 		return err
@@ -153,6 +159,11 @@ func resourceTencentCloudKubernetesNodePoolCreatePostHandleResponse0(ctx context
 		return err
 	}
 
+	// wait node scaling
+	if err = waitNodePoolInitializing(ctx, clusterId, nodePoolId); err != nil {
+		return err
+	}
+
 	return nil
 }
 
@@ -556,6 +567,11 @@ func resourceTencentCloudKubernetesNodePoolUpdateOnStart(ctx context.Context) er
 	clusterId := items[0]
 	nodePoolId := items[1]
 
+	// check params
+	if err := checkParams(ctx); err != nil {
+		return err
+	}
+
 	d.Partial(true)
 
 	nodePool, _, err := service.DescribeNodePool(ctx, clusterId, nodePoolId)
@@ -614,6 +630,11 @@ func resourceTencentCloudKubernetesNodePoolUpdateOnStart(ctx context.Context) er
 			return err
 		}
 		capacityHasChanged = true
+
+		// wait node scaling
+		if err = waitNodePoolInitializing(ctx, clusterId, nodePoolId); err != nil {
+			return err
+		}
 	}
 
 	// ModifyClusterNodePool
@@ -697,6 +718,11 @@ func resourceTencentCloudKubernetesNodePoolUpdateOnStart(ctx context.Context) er
 		if err != nil {
 			return err
 		}
+
+		// wait node scaling
+		if err = waitNodePoolInitializing(ctx, clusterId, nodePoolId); err != nil {
+			return err
+		}
 	}
 
 	return nil
@@ -1351,3 +1377,135 @@ func resourceTencentCloudKubernetesNodePoolUpdateTaints(ctx context.Context, clu
 	}
 	return nil
 }
+
+func checkParams(ctx context.Context) error {
+	d := tccommon.ResourceDataFromContext(ctx)
+	var (
+		enableAutoscale bool
+		waitNodeReady   bool
+	)
+
+	if v, ok := d.GetOkExists("enable_auto_scale"); ok {
+		enableAutoscale = v.(bool)
+	}
+
+	if v, ok := d.GetOkExists("wait_node_ready"); ok {
+		waitNodeReady = v.(bool)
+	}
+
+	if enableAutoscale && waitNodeReady {
+		return fmt.Errorf("`wait_node_ready` only can be set if `enable_auto_scale` is `false`.")
+	}
+
+	if _, ok := d.GetOkExists("scale_tolerance"); ok {
+		if !waitNodeReady {
+			return fmt.Errorf("`scale_tolerance` only can be set if `wait_node_ready` is `true`.")
+		}
+	}
+
+	return nil
+}
+
+func waitNodePoolInitializing(ctx context.Context, clusterId, nodePoolId string) (err error) {
+	d := tccommon.ResourceDataFromContext(ctx)
+	meta := tccommon.ProviderMetaFromContext(ctx)
+
+	var (
+		currentNormal      int64
+		desiredCapacity    int64
+		waitNodeReady      bool
+		scaleTolerance     int64 = 100
+		autoscalingGroupId string
+	)
+
+	if v, ok := d.GetOkExists("desired_capacity"); ok {
+		desiredCapacity = int64(v.(int))
+		if desiredCapacity == 0 {
+			desiredCapacity = 1
+		}
+	}
+
+	if v, ok := d.GetOkExists("wait_node_ready"); ok {
+		waitNodeReady = v.(bool)
+	}
+
+	if waitNodeReady {
+		if v, ok := d.GetOkExists("scale_tolerance"); ok {
+			scaleTolerance = int64(v.(int))
+		}
+
+		logId := tccommon.GetLogId(tccommon.ContextNil)
+		nodePoolDetailrequest := tke.NewDescribeClusterNodePoolDetailRequest()
+		nodePoolDetailrequest.ClusterId = common.StringPtr(clusterId)
+		nodePoolDetailrequest.NodePoolId = common.StringPtr(nodePoolId)
+		err = resource.Retry(1*tccommon.ReadRetryTimeout, func() *resource.RetryError {
+			result, e := meta.(tccommon.ProviderMeta).GetAPIV3Conn().UseTkeV20180525Client().DescribeClusterNodePoolDetailWithContext(ctx, nodePoolDetailrequest)
+			if e != nil {
+				return tccommon.RetryError(e)
+			} else {
+				log.Printf("[DEBUG]%s api[%s] success, request body [%s], response body [%s]\n", logId, nodePoolDetailrequest.GetAction(), nodePoolDetailrequest.ToJsonString(), result.ToJsonString())
+			}
+
+			if result == nil || result.Response == nil || result.Response.NodePool == nil || result.Response.NodePool.NodeCountSummary == nil || result.Response.NodePool.NodeCountSummary.AutoscalingAdded == nil {
+				e = fmt.Errorf("Cluster %s node pool %s not exists", clusterId, nodePoolId)
+				return resource.NonRetryableError(e)
+			}
+
+			desiredNodesNum := result.Response.NodePool.DesiredNodesNum
+			autoscalingAdded := result.Response.NodePool.NodeCountSummary.AutoscalingAdded
+			total := autoscalingAdded.Total
+			normal := autoscalingAdded.Normal
+			if *total != 0 {
+				if *normal > *desiredNodesNum {
+					return resource.RetryableError(fmt.Errorf("Node pool is still scaling"))
+				}
+
+				currentTolerance := int64((float64(*normal) / float64(*desiredNodesNum)) * 100)
+				if currentTolerance >= scaleTolerance || *desiredNodesNum == *normal {
+					return nil
+				}
+			}
+
+			currentNormal = *normal
+			autoscalingGroupId = *result.Response.NodePool.AutoscalingGroupId
+			return resource.RetryableError(fmt.Errorf("Node pool is still scaling"))
+		})
+
+		if err != nil {
+			if currentNormal < 1 {
+				var errFmt string
+				asRequest := as.NewDescribeAutoScalingActivitiesRequest()
+				asRequest.Filters = []*as.Filter{
+					{
+						Name:   common.StringPtr("auto-scaling-group-id"),
+						Values: common.StringPtrs([]string{autoscalingGroupId}),
+					},
+				}
+
+				err = resource.Retry(tccommon.ReadRetryTimeout, func() *resource.RetryError {
+					result, e := meta.(tccommon.ProviderMeta).GetAPIV3Conn().UseAsClient().DescribeAutoScalingActivitiesWithContext(ctx, asRequest)
+					if e != nil {
+						return tccommon.RetryError(e)
+					} else {
+						log.Printf("[DEBUG]%s api[%s] success, request body [%s], response body [%s]\n", logId, asRequest.GetAction(), asRequest.ToJsonString(), result.ToJsonString())
+					}
+
+					if result == nil || result.Response == nil || result.Response.ActivitySet == nil || len(result.Response.ActivitySet) < 1 {
+						e = fmt.Errorf("Describe auto scaling activities failed")
+						return resource.NonRetryableError(e)
+					}
+
+					res := result.Response.ActivitySet[0]
+					errFmt = fmt.Sprintf("%s\nDescription: %s\nStatusMessage: %s", *res.StatusMessageSimplified, *res.Description, *res.StatusMessage)
+					return nil
+				})
+
+				return fmt.Errorf("Node pool scaling failed, Reason: %s\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", errFmt)
+			} else {
+				return fmt.Errorf("Node pool scaling failed, Desired value: %d, Actual value: %d, Scale tolerance: %d%%\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", desiredCapacity, currentNormal, scaleTolerance)
+			}
+		}
+	}
+
+	return nil
+}
@@ -17,6 +17,8 @@ Provide a resource to create an auto scaling group for kubernetes cluster.
 
 ~> **NOTE:**  In order to ensure the integrity of customer data, if the cvm instance was destroyed due to shrinking, it will keep the cbs associate with cvm by default. If you want to destroy together, please set `delete_with_instance` to `true`.
 
+~> **NOTE:**  There are two parameters `wait_node_ready` and `scale_tolerance` to ensure better management of node pool scaling operations. If this parameter is set, when creating resources, if the set criteria are not met, the resources will be marked as `tainted`.
+
 ## Example Usage
 
 ```hcl
@@ -154,6 +156,73 @@ resource "tencentcloud_kubernetes_node_pool" "example" {
 }
 ```
 
+
+
+```hcl
+resource "tencentcloud_kubernetes_node_pool" "example" {
+  name                     = "tf-example"
+  cluster_id               = tencentcloud_kubernetes_cluster.managed_cluster.id
+  max_size                 = 100
+  min_size                 = 1
+  vpc_id                   = data.tencentcloud_vpc_subnets.vpc.instance_list.0.vpc_id
+  subnet_ids               = [data.tencentcloud_vpc_subnets.vpc.instance_list.0.subnet_id]
+  retry_policy             = "INCREMENTAL_INTERVALS"
+  desired_capacity         = 50
+  enable_auto_scale        = false
+  wait_node_ready          = true
+  scale_tolerance          = 90
+  multi_zone_subnet_policy = "EQUALITY"
+  node_os                  = "img-6n21msk1"
+  delete_keep_instance     = false
+
+  auto_scaling_config {
+    instance_type              = var.default_instance_type
+    system_disk_type           = "CLOUD_PREMIUM"
+    system_disk_size           = "50"
+    orderly_security_group_ids = ["sg-bw28gmso"]
+
+    data_disk {
+      disk_type            = "CLOUD_PREMIUM"
+      disk_size            = 50
+      delete_with_instance = true
+    }
+
+    internet_charge_type       = "TRAFFIC_POSTPAID_BY_HOUR"
+    internet_max_bandwidth_out = 10
+    public_ip_assigned         = true
+    password                   = "test123#"
+    enhanced_security_service  = false
+    enhanced_monitor_service   = false
+    host_name                  = "12.123.0.0"
+    host_name_style            = "ORIGINAL"
+  }
+
+  labels = {
+    "test1" = "test1",
+    "test2" = "test2",
+  }
+
+  taints {
+    key    = "test_taint"
+    value  = "taint_value"
+    effect = "PreferNoSchedule"
+  }
+
+  taints {
+    key    = "test_taint2"
+    value  = "taint_value2"
+    effect = "PreferNoSchedule"
+  }
+
+  node_config {
+    docker_graph_path = "/var/lib/docker"
+    extra_args = [
+      "root-dir=/var/lib/kubelet"
+    ]
+  }
+}
+```
+
 ## Argument Reference
 
 The following arguments are supported:
@@ -176,6 +245,7 @@ The following arguments are supported:
 * `node_os_type` - (Optional, String) The image version of the node. Valida values are `DOCKER_CUSTOMIZE` and `GENERAL`. Default is `GENERAL`. This parameter will only affect new nodes, not including the existing nodes.
 * `node_os` - (Optional, String) Operating system of the cluster. Please refer to [TencentCloud Documentation](https://www.tencentcloud.com/document/product/457/46750?lang=en&pg=#list-of-public-images-supported-by-tke) for available values. Default is 'tlinux2.4x86_64'. This parameter will only affect new nodes, not including the existing nodes.
 * `retry_policy` - (Optional, String, ForceNew) Available values for retry policies include `IMMEDIATE_RETRY` and `INCREMENTAL_INTERVALS`.
+* `scale_tolerance` - (Optional, Int) Control how many expectations(`desired_capacity`) can be tolerated successfully. Unit is percentage, Default is `100`. Only can be set if `wait_node_ready` is `true`.
 * `scaling_group_name` - (Optional, String) Name of relative scaling group.
 * `scaling_group_project_id` - (Optional, Int) Project ID the scaling group belongs to.
 * `scaling_mode` - (Optional, String, ForceNew) Auto scaling mode. Valid values are `CLASSIC_SCALING`(scaling by create/destroy instances), `WAKE_UP_STOPPED_SCALING`(Boot priority for expansion. When expanding the capacity, the shutdown operation is given priority to the shutdown of the instance. If the number of instances is still lower than the expected number of instances after the startup, the instance will be created, and the method of destroying the instance will still be used for shrinking).
@@ -184,6 +254,7 @@ The following arguments are supported:
 * `taints` - (Optional, List) Taints of kubernetes node pool created nodes.
 * `termination_policies` - (Optional, List: [`String`]) Policy of scaling group termination. Available values: `["OLDEST_INSTANCE"]`, `["NEWEST_INSTANCE"]`.
 * `unschedulable` - (Optional, Int, ForceNew) Sets whether the joining node participates in the schedule. Default is '0'. Participate in scheduling.
+* `wait_node_ready` - (Optional, Bool) Whether to wait for all expansion resources to be ready. Default is false. Only can be set if `enable_auto_scale` is `false`.
 * `zones` - (Optional, List: [`String`]) List of auto scaling group available zones, for Basic network it is required.
 
 The `annotations` object supports the following: