From 8334f4400ae49143422eee5aa7690e5c31e76944 Mon Sep 17 00:00:00 2001
From: SevenEarth <391613297@qq.com>
Date: Thu, 28 Nov 2024 14:45:00 +0800
Subject: [PATCH 1/4] add

---
 .../tke/resource_tc_kubernetes_node_pool.go   |  13 ++
 .../tke/resource_tc_kubernetes_node_pool.md   |  68 ++++++++
 ...ource_tc_kubernetes_node_pool_extension.go | 158 ++++++++++++++++++
 .../docs/r/kubernetes_node_pool.html.markdown |  71 ++++++++
 4 files changed, 310 insertions(+)

diff --git a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.go b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.go
index bfb7a74d37..b96b9fbf3d 100644
--- a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.go
+++ b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.go
@@ -63,6 +63,19 @@ func ResourceTencentCloudKubernetesNodePool() *schema.Resource {
 				ValidateFunc: tccommon.ValidateIntegerInRange(0, 2000),
 			},
 
+			"wait_node_ready": {
+				Type:        schema.TypeBool,
+				Optional:    true,
+				Description: "Whether to wait for all expansion resources to be ready. Default is false. Only can be set if `enable_auto_scale` is `false`.",
+			},
+
+			"scale_tolerance": {
+				Type:         schema.TypeInt,
+				Optional:     true,
+				Description:  "Control how many expectations(`desired_capacity`) can be tolerated successfully. Unit is percentage, Default is `100`. Only can be set if `wait_node_ready` is `true`.",
+				ValidateFunc: tccommon.ValidateIntegerInRange(0, 100),
+			},
+
 			"enable_auto_scale": {
 				Type:        schema.TypeBool,
 				Optional:    true,
diff --git a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
index 7c4efd497b..3186d77b3b 100644
--- a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
+++ b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
@@ -6,6 +6,8 @@ Provide a resource to create an auto scaling group for kubernetes cluster.
 
 ~> **NOTE:**  In order to ensure the integrity of customer data, if the cvm instance was destroyed due to shrinking, it will keep the cbs associate with cvm by default. If you want to destroy together, please set `delete_with_instance` to `true`.
 
+~> **NOTE:**  There are two parameters `wait_node_ready` and `scale_tolerance` to ensure better management of node pool scaling operations. If this parameter is set, when creating resources, if the set criteria are not met, the resources will be marked as `tainted`.
+
 Example Usage
 
 ```hcl
@@ -143,6 +145,72 @@ resource "tencentcloud_kubernetes_node_pool" "example" {
 }
 ```
 
+Set `wait_node_ready` and `scale_tolerance`
+```hcl
+resource "tencentcloud_kubernetes_node_pool" "example" {
+  name                     = "tf-example"
+  cluster_id               = tencentcloud_kubernetes_cluster.managed_cluster.id
+  max_size                 = 100
+  min_size                 = 1
+  vpc_id                   = data.tencentcloud_vpc_subnets.vpc.instance_list.0.vpc_id
+  subnet_ids               = [data.tencentcloud_vpc_subnets.vpc.instance_list.0.subnet_id]
+  retry_policy             = "INCREMENTAL_INTERVALS"
+  desired_capacity         = 50
+  enable_auto_scale        = false
+  wait_node_ready          = true
+  scale_tolerance          = 90
+  multi_zone_subnet_policy = "EQUALITY"
+  node_os                  = "img-6n21msk1"
+  delete_keep_instance     = false
+
+  auto_scaling_config {
+    instance_type              = var.default_instance_type
+    system_disk_type           = "CLOUD_PREMIUM"
+    system_disk_size           = "50"
+    orderly_security_group_ids = ["sg-bw28gmso"]
+
+    data_disk {
+      disk_type            = "CLOUD_PREMIUM"
+      disk_size            = 50
+      delete_with_instance = true
+    }
+
+    internet_charge_type       = "TRAFFIC_POSTPAID_BY_HOUR"
+    internet_max_bandwidth_out = 10
+    public_ip_assigned         = true
+    password                  = "test123#"
+    enhanced_security_service = false
+    enhanced_monitor_service  = false
+    host_name                 = "12.123.0.0"
+    host_name_style           = "ORIGINAL"
+  }
+
+  labels = {
+    "test1" = "test1",
+    "test2" = "test2",
+  }
+
+  taints {
+    key    = "test_taint"
+    value  = "taint_value"
+    effect = "PreferNoSchedule"
+  }
+
+  taints {
+    key    = "test_taint2"
+    value  = "taint_value2"
+    effect = "PreferNoSchedule"
+  }
+
+  node_config {
+    docker_graph_path = "/var/lib/docker"
+    extra_args = [
+      "root-dir=/var/lib/kubelet"
+    ]
+  }
+}
+```
+
 Import
 
 tke node pool can be imported, e.g.
diff --git a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
index d967ced3b1..fbd98daf67 100644
--- a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
+++ b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
@@ -6,6 +6,7 @@ import (
 	"log"
 	"strings"
 
+	"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common"
 	tchttp "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/http"
 
 	as "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/as/v20180419"
@@ -54,6 +55,11 @@ func resourceTencentCloudKubernetesNodePoolCreatePostFillRequest0(ctx context.Co
 		return fmt.Errorf("need only one auto_scaling_config")
 	}
 
+	// check params
+	if err := checkParams(ctx); err != nil {
+		return err
+	}
+
 	groupParaStr, err := composeParameterToAsScalingGroupParaSerial(d)
 	if err != nil {
 		return err
@@ -153,6 +159,11 @@ func resourceTencentCloudKubernetesNodePoolCreatePostHandleResponse0(ctx context
 		return err
 	}
 
+	// wait node scaling
+	if err = waitNodePoolInitializing(ctx, clusterId, nodePoolId); err != nil {
+		return err
+	}
+
 	return nil
 }
 
@@ -556,6 +567,11 @@ func resourceTencentCloudKubernetesNodePoolUpdateOnStart(ctx context.Context) er
 	clusterId := items[0]
 	nodePoolId := items[1]
 
+	// check params
+	if err := checkParams(ctx); err != nil {
+		return err
+	}
+
 	d.Partial(true)
 
 	nodePool, _, err := service.DescribeNodePool(ctx, clusterId, nodePoolId)
@@ -614,6 +630,11 @@ func resourceTencentCloudKubernetesNodePoolUpdateOnStart(ctx context.Context) er
 			return err
 		}
 		capacityHasChanged = true
+
+		// wait node scaling
+		if err = waitNodePoolInitializing(ctx, clusterId, nodePoolId); err != nil {
+			return err
+		}
 	}
 
 	// ModifyClusterNodePool
@@ -697,6 +718,11 @@ func resourceTencentCloudKubernetesNodePoolUpdateOnStart(ctx context.Context) er
 		if err != nil {
 			return err
 		}
+
+		// wait node scaling
+		if err = waitNodePoolInitializing(ctx, clusterId, nodePoolId); err != nil {
+			return err
+		}
 	}
 
 	return nil
@@ -1351,3 +1377,135 @@ func resourceTencentCloudKubernetesNodePoolUpdateTaints(ctx context.Context, clu
 	}
 	return nil
 }
+
+func checkParams(ctx context.Context) error {
+	d := tccommon.ResourceDataFromContext(ctx)
+	var (
+		enableAutoscale bool
+		waitNodeReady   bool
+	)
+
+	if v, ok := d.GetOkExists("enable_auto_scale"); ok {
+		enableAutoscale = v.(bool)
+	}
+
+	if v, ok := d.GetOkExists("wait_node_ready"); ok {
+		waitNodeReady = v.(bool)
+	}
+
+	if enableAutoscale && waitNodeReady {
+		return fmt.Errorf("`wait_node_ready` only can be set if `enable_auto_scale` is `false`.")
+	}
+
+	if _, ok := d.GetOkExists("scale_tolerance"); ok {
+		if !waitNodeReady {
+			return fmt.Errorf("`scale_tolerance` only can be set if `wait_node_ready` is `true`.")
+		}
+	}
+
+	return nil
+}
+
+func waitNodePoolInitializing(ctx context.Context, clusterId, nodePoolId string) (err error) {
+	d := tccommon.ResourceDataFromContext(ctx)
+	meta := tccommon.ProviderMetaFromContext(ctx)
+
+	var (
+		currentNormal      int64
+		desiredCapacity    int64
+		waitNodeReady      bool
+		scaleTolerance     int64 = 100
+		autoscalingGroupId string
+	)
+
+	if v, ok := d.GetOkExists("desired_capacity"); ok {
+		desiredCapacity = int64(v.(int))
+		if desiredCapacity == 0 {
+			desiredCapacity = 1
+		}
+	}
+
+	if v, ok := d.GetOkExists("wait_node_ready"); ok {
+		waitNodeReady = v.(bool)
+	}
+
+	if waitNodeReady {
+		if v, ok := d.GetOkExists("scale_tolerance"); ok {
+			scaleTolerance = int64(v.(int))
+		}
+
+		logId := tccommon.GetLogId(tccommon.ContextNil)
+		nodePoolDetailrequest := tke.NewDescribeClusterNodePoolDetailRequest()
+		nodePoolDetailrequest.ClusterId = common.StringPtr(clusterId)
+		nodePoolDetailrequest.NodePoolId = common.StringPtr(nodePoolId)
+		err = resource.Retry(1*tccommon.ReadRetryTimeout, func() *resource.RetryError {
+			result, e := meta.(tccommon.ProviderMeta).GetAPIV3Conn().UseTkeV20180525Client().DescribeClusterNodePoolDetailWithContext(ctx, nodePoolDetailrequest)
+			if e != nil {
+				return tccommon.RetryError(e)
+			} else {
+				log.Printf("[DEBUG]%s api[%s] success, request body [%s], response body [%s]\n", logId, nodePoolDetailrequest.GetAction(), nodePoolDetailrequest.ToJsonString(), result.ToJsonString())
+			}
+
+			if result == nil || result.Response == nil || result.Response.NodePool == nil || result.Response.NodePool.NodeCountSummary == nil || result.Response.NodePool.NodeCountSummary.AutoscalingAdded == nil {
+				e = fmt.Errorf("Cluster %s node pool %s not exists", clusterId, nodePoolId)
+				return resource.NonRetryableError(e)
+			}
+
+			desiredNodesNum := result.Response.NodePool.DesiredNodesNum
+			autoscalingAdded := result.Response.NodePool.NodeCountSummary.AutoscalingAdded
+			total := autoscalingAdded.Total
+			normal := autoscalingAdded.Normal
+			if *total != 0 {
+				if *normal > *desiredNodesNum {
+					return resource.RetryableError(fmt.Errorf("Node pool is still scaling"))
+				}
+
+				currentTolerance := int64((float64(*normal) / float64(*desiredNodesNum)) * 100)
+				if currentTolerance >= scaleTolerance || *desiredNodesNum == *normal {
+					return nil
+				}
+			}
+
+			currentNormal = *normal
+			autoscalingGroupId = *result.Response.NodePool.AutoscalingGroupId
+			return resource.RetryableError(fmt.Errorf("Node pool is still scaling"))
+		})
+
+		if err != nil {
+			if currentNormal < 1 {
+				var errFmt string
+				asRequest := as.NewDescribeAutoScalingActivitiesRequest()
+				asRequest.Filters = []*as.Filter{
+					{
+						Name:   common.StringPtr("auto-scaling-group-id"),
+						Values: common.StringPtrs([]string{autoscalingGroupId}),
+					},
+				}
+
+				err = resource.Retry(tccommon.ReadRetryTimeout, func() *resource.RetryError {
+					result, e := meta.(tccommon.ProviderMeta).GetAPIV3Conn().UseAsClient().DescribeAutoScalingActivitiesWithContext(ctx, asRequest)
+					if e != nil {
+						return tccommon.RetryError(e)
+					} else {
+						log.Printf("[DEBUG]%s api[%s] success, request body [%s], response body [%s]\n", logId, asRequest.GetAction(), asRequest.ToJsonString(), result.ToJsonString())
+					}
+
+					if result == nil || result.Response == nil || result.Response.ActivitySet == nil || len(result.Response.ActivitySet) < 1 {
+						e = fmt.Errorf("Describe auto scaling activities failed")
+						return resource.NonRetryableError(e)
+					}
+
+					res := result.Response.ActivitySet[0]
+					errFmt = fmt.Sprintf("%s\nDescription: %s\nStatusMessage: %s", *res.StatusMessageSimplified, *res.Description, *res.StatusMessage)
+					return nil
+				})
+
+				return fmt.Errorf("Node pool scaling failed, Reason: %s\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", errFmt)
+			} else {
+				return fmt.Errorf("Node pool scaling failed, Desired value: %d, Actual value: %d, Scale tolerance: %d%%\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", desiredCapacity, currentNormal, scaleTolerance)
+			}
+		}
+	}
+
+	return nil
+}
diff --git a/website/docs/r/kubernetes_node_pool.html.markdown b/website/docs/r/kubernetes_node_pool.html.markdown
index b8fe3e0c1b..9b5baf57b2 100644
--- a/website/docs/r/kubernetes_node_pool.html.markdown
+++ b/website/docs/r/kubernetes_node_pool.html.markdown
@@ -17,6 +17,8 @@ Provide a resource to create an auto scaling group for kubernetes cluster.
 
 ~> **NOTE:**  In order to ensure the integrity of customer data, if the cvm instance was destroyed due to shrinking, it will keep the cbs associate with cvm by default. If you want to destroy together, please set `delete_with_instance` to `true`.
 
+~> **NOTE:**  There are two parameters `wait_node_ready` and `scale_tolerance` to ensure better management of node pool scaling operations. If this parameter is set, when creating resources, if the set criteria are not met, the resources will be marked as `tainted`.
+
 ## Example Usage
 
 ```hcl
@@ -154,6 +156,73 @@ resource "tencentcloud_kubernetes_node_pool" "example" {
 }
 ```
 
+
+
+```hcl
+resource "tencentcloud_kubernetes_node_pool" "example" {
+  name                     = "tf-example"
+  cluster_id               = tencentcloud_kubernetes_cluster.managed_cluster.id
+  max_size                 = 100
+  min_size                 = 1
+  vpc_id                   = data.tencentcloud_vpc_subnets.vpc.instance_list.0.vpc_id
+  subnet_ids               = [data.tencentcloud_vpc_subnets.vpc.instance_list.0.subnet_id]
+  retry_policy             = "INCREMENTAL_INTERVALS"
+  desired_capacity         = 50
+  enable_auto_scale        = false
+  wait_node_ready          = true
+  scale_tolerance          = 90
+  multi_zone_subnet_policy = "EQUALITY"
+  node_os                  = "img-6n21msk1"
+  delete_keep_instance     = false
+
+  auto_scaling_config {
+    instance_type              = var.default_instance_type
+    system_disk_type           = "CLOUD_PREMIUM"
+    system_disk_size           = "50"
+    orderly_security_group_ids = ["sg-bw28gmso"]
+
+    data_disk {
+      disk_type            = "CLOUD_PREMIUM"
+      disk_size            = 50
+      delete_with_instance = true
+    }
+
+    internet_charge_type       = "TRAFFIC_POSTPAID_BY_HOUR"
+    internet_max_bandwidth_out = 10
+    public_ip_assigned         = true
+    password                   = "test123#"
+    enhanced_security_service  = false
+    enhanced_monitor_service   = false
+    host_name                  = "12.123.0.0"
+    host_name_style            = "ORIGINAL"
+  }
+
+  labels = {
+    "test1" = "test1",
+    "test2" = "test2",
+  }
+
+  taints {
+    key    = "test_taint"
+    value  = "taint_value"
+    effect = "PreferNoSchedule"
+  }
+
+  taints {
+    key    = "test_taint2"
+    value  = "taint_value2"
+    effect = "PreferNoSchedule"
+  }
+
+  node_config {
+    docker_graph_path = "/var/lib/docker"
+    extra_args = [
+      "root-dir=/var/lib/kubelet"
+    ]
+  }
+}
+```
+
 ## Argument Reference
 
 The following arguments are supported:
@@ -176,6 +245,7 @@ The following arguments are supported:
 * `node_os_type` - (Optional, String) The image version of the node. Valida values are `DOCKER_CUSTOMIZE` and `GENERAL`. Default is `GENERAL`. This parameter will only affect new nodes, not including the existing nodes.
 * `node_os` - (Optional, String) Operating system of the cluster. Please refer to [TencentCloud Documentation](https://www.tencentcloud.com/document/product/457/46750?lang=en&pg=#list-of-public-images-supported-by-tke) for available values. Default is 'tlinux2.4x86_64'. This parameter will only affect new nodes, not including the existing nodes.
 * `retry_policy` - (Optional, String, ForceNew) Available values for retry policies include `IMMEDIATE_RETRY` and `INCREMENTAL_INTERVALS`.
+* `scale_tolerance` - (Optional, Int) Control how many expectations(`desired_capacity`) can be tolerated successfully. Unit is percentage, Default is `100`. Only can be set if `wait_node_ready` is `true`.
 * `scaling_group_name` - (Optional, String) Name of relative scaling group.
 * `scaling_group_project_id` - (Optional, Int) Project ID the scaling group belongs to.
 * `scaling_mode` - (Optional, String, ForceNew) Auto scaling mode. Valid values are `CLASSIC_SCALING`(scaling by create/destroy instances), `WAKE_UP_STOPPED_SCALING`(Boot priority for expansion. When expanding the capacity, the shutdown operation is given priority to the shutdown of the instance. If the number of instances is still lower than the expected number of instances after the startup, the instance will be created, and the method of destroying the instance will still be used for shrinking).
@@ -184,6 +254,7 @@ The following arguments are supported:
 * `taints` - (Optional, List) Taints of kubernetes node pool created nodes.
 * `termination_policies` - (Optional, List: [`String`]) Policy of scaling group termination. Available values: `["OLDEST_INSTANCE"]`, `["NEWEST_INSTANCE"]`.
 * `unschedulable` - (Optional, Int, ForceNew) Sets whether the joining node participates in the schedule. Default is '0'. Participate in scheduling.
+* `wait_node_ready` - (Optional, Bool) Whether to wait for all expansion resources to be ready. Default is false. Only can be set if `enable_auto_scale` is `false`.
 * `zones` - (Optional, List: [`String`]) List of auto scaling group available zones, for Basic network it is required.
 
 The `annotations` object supports the following:

From d7ddeb333c7919c343e40342172f818f13b23a1f Mon Sep 17 00:00:00 2001
From: SevenEarth <391613297@qq.com>
Date: Thu, 28 Nov 2024 14:46:32 +0800
Subject: [PATCH 2/4] add

---
 .changelog/2979.txt | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .changelog/2979.txt

diff --git a/.changelog/2979.txt b/.changelog/2979.txt
new file mode 100644
index 0000000000..a0eb141ba3
--- /dev/null
+++ b/.changelog/2979.txt
@@ -0,0 +1,3 @@
+```release-note:enhancement
+resource/tencentcloud_kubernetes_node_pool: add `wait_node_ready`, `scale_tolerance` params
+```

From 93fc1c79280b22620c8373aa201d190a69791d57 Mon Sep 17 00:00:00 2001
From: SevenEarth <391613297@qq.com>
Date: Thu, 28 Nov 2024 15:08:25 +0800
Subject: [PATCH 3/4] add

---
 .../tke/resource_tc_kubernetes_node_pool.md        |  1 +
 .../resource_tc_kubernetes_node_pool_extension.go  | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
index 3186d77b3b..345c22b853 100644
--- a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
+++ b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool.md
@@ -146,6 +146,7 @@ resource "tencentcloud_kubernetes_node_pool" "example" {
 ```
 
 Set `wait_node_ready` and `scale_tolerance`
+
 ```hcl
 resource "tencentcloud_kubernetes_node_pool" "example" {
   name                     = "tf-example"
diff --git a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
index fbd98daf67..44a152a20b 100644
--- a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
+++ b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
@@ -1418,18 +1418,18 @@ func waitNodePoolInitializing(ctx context.Context, clusterId, nodePoolId string)
 		autoscalingGroupId string
 	)
 
-	if v, ok := d.GetOkExists("desired_capacity"); ok {
-		desiredCapacity = int64(v.(int))
-		if desiredCapacity == 0 {
-			desiredCapacity = 1
-		}
-	}
-
 	if v, ok := d.GetOkExists("wait_node_ready"); ok {
 		waitNodeReady = v.(bool)
 	}
 
 	if waitNodeReady {
+		if v, ok := d.GetOkExists("desired_capacity"); ok {
+			desiredCapacity = int64(v.(int))
+			if desiredCapacity == 0 {
+				desiredCapacity = 1
+			}
+		}
+
 		if v, ok := d.GetOkExists("scale_tolerance"); ok {
 			scaleTolerance = int64(v.(int))
 		}

From 052fef8ab047f67cfe3ecc4257046a2d9027b85e Mon Sep 17 00:00:00 2001
From: SevenEarth <391613297@qq.com>
Date: Thu, 28 Nov 2024 16:23:16 +0800
Subject: [PATCH 4/4] add

---
 .../tke/resource_tc_kubernetes_node_pool_extension.go         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
index 44a152a20b..8a13d43d00 100644
--- a/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
+++ b/tencentcloud/services/tke/resource_tc_kubernetes_node_pool_extension.go
@@ -1500,7 +1500,9 @@ func waitNodePoolInitializing(ctx context.Context, clusterId, nodePoolId string)
 					return nil
 				})
 
-				return fmt.Errorf("Node pool scaling failed, Reason: %s\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", errFmt)
+				if err != nil {
+					return fmt.Errorf("Node pool scaling failed, Reason: %s\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", errFmt)
+				}
 			} else {
 				return fmt.Errorf("Node pool scaling failed, Desired value: %d, Actual value: %d, Scale tolerance: %d%%\nPlease check your resource inventory, Or adjust `desired_capacity`, `scale_tolerance` and `instance_type`, Then try again.", desiredCapacity, currentNormal, scaleTolerance)
 			}