$ tanzu cluster list -ANAME NAMESPACE STATUS CONTROLPLANE WORKERS KUBERNETESworkload-cluster-A default updateStalled 3/3 3/3 v1.28.7+vmware.1
$ kubectl get node NAME STATUS ROLES AGE VERSIONtanzu-control-plane-A Ready control-plane 21h v1.28.7+vmware.1tanzu-control-plane-B Ready control-plane 21h v1.28.7+vmware.1tanzu-control-plane-C Ready control-plane 21h v1.28.7+vmware.1tanzu-worker-node-A Ready <none> 167d v1.27.5+vmware.1tanzu-worker-node-B Ready <none> 167d v1.27.5+vmware.1tanzu-worker node-C Ready <none> 167d v1.27.5+vmware.1
$ kubectl -n capi-system logs deployments/capi-controller-manager | grep $WORKLOAD_CLUSTER_NAMEMMDD: 1 machineset_controller.go:439] "MachineSet is scaling up to 1 replicas by creating 1 machines" controller="machineset" controllerGroup="cluster.x-k8s.io" controllerKind="MachineSet" MachineSet="default/<Machine-set name>" namespace="default" name="<Machine-set name>" reconcileID=########-####-####-####-############ MachineDeployment="default/<MachineDeployment-name>" Cluster="default/tanzu" replicas=1 machineCount=0MMDD: 1 machineset_preflight.go:140] "Performing \"Scale up\" on hold because MachineSet version (1.27.5+vmware.1) and ControlPlane version (1.28.7+vmware.1) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (\"KubeadmVersionSkew\" preflight failed). The operation will continue after the preflight check(s) pass" controller="machineset" controllerGroup="cluster.x-k8s.io" controllerKind="MachineSet" MachineSet="default/<Machine-set name>" namespace="default"
$ tanzu cluster list -ANAME NAMESPACE STATUS CONTROLPLANE WORKERS KUBERNETESworkload-cluster-A default updateStalled 3/3 2/3 v1.28.7+vmware.1
$ kubectl get cluster ${WORKLOAD_CLUSTER_NAME} -oyaml | yq .metadata... labels: tanzuKubernetesRelease: v1.28.7+vmware.1-tkg.3 tkg.tanzu.vmware.com/cluster-name: workload-cluster-A
This workaround is verified only for the Legacy cluster, not the class-based cluster.
If you encounter this issue with a class-based cluster, please raise a new support case.
kubectl get md -Aexport CNS='default' # Workload Cluster Namespaceexport MD='workload-slot35rp25-md-0'
VMT_NAME=$(kubectl get machinedeployments $MD -n $CNS -o jsonpath='{.spec.template.spec.infrastructureRef.name}')kubectl get vspheremachinetemplates $VMT_NAME -o yaml > vmt_${VMT_NAME}.yaml
cp vmt_${VMT_NAME}.yaml vmt_${VMT_NAME}-new.yaml
vim vmt_${VMT_NAME}-new.yaml [Modifications] metadata.annotations -> [delete] metadata.creationTimestamp -> [delete] metadata.generation -> [delete] metadata.resourceVersion -> [delete] metadata.uid -> [delete] metadata.name = ${VMT_NAME}-new spec.template.spec.template = /Datacenter/vm/photon-5-kube-v1.28.4+vmware.1 # Set target path
kubectl apply -f vmt_${VMT_NAME}-new.yaml
Create a new KubeadmConfigTemplate that has the expected config
KCT_NAME=$(kubectl get machinedeployments $MD -n $CNS -o jsonpath='{.spec.template.spec.bootstrap.configRef.name}')kubectl get kubeadmconfigtemplates $KCT_NAME -n $CNS -o yaml > kct_${KCT_NAME}.yamlcp kct_${KCT_NAME}.yaml kct_${KCT_NAME}-new.yaml
vim kct_${KCT_NAME}-new.yaml [Modifications] metadata.annotations -> [delete] metadata.creationTimestamp -> [delete] metadata.generation -> [delete] metadata.resourceVersion -> [delete] metadata.uid -> [delete] metadata.name = ${KCT_NAME}-new
kubectl apply -f kct_${KCT_NAME}-new.yaml
Create a new MachineDeployment
Make sure it has the correct OwnerReferences to the above objects and the target Cluster.
kubectl get machinedeployments ${MD} -n $CNS -o yaml > md_${MD}.yamlcp md_${MD}.yaml md_${MD}-new.yaml
vim md_${MD}-new.yaml[Modifications] metadata.annotations -> [delete] metadata.creationTimestamp -> [delete] metadata.generation -> [delete] metadata.resourceVersion -> [delete] metadata.uid -> [delete] metadata.name = ${MD}-new spec.rolloutAfter -> [delete] *If set up. spec.selector.matchLabels.cluster.x-k8s.io/deployment-name = ${MD}-new spec.template.metadata.labels.cluster.x-k8s.io/deployment-name = ${MD}-new spec.template.metadata.labels.node-pool = Add -new as suffix spec.template.spec.bootstrap.configRef.name: ${KCT_NAME}-new spec.template.spec.infrastructureRef.name: ${VMT_NAME}-new spec.template.spec.version = v1.28.4+vmware.1 # Set target version status -> [delete]
kubectl apply -f md_${MD}-new.yaml
Observations after machine deployment creationkubectl get mdtanzu cluster list
Delete the old problematic MachineDeployment and its VSphereMachineTemplate and KubeadmConfigTemplate
kubectl config use-context <Context of the target cluster>kubectl drain <Nodes in the old node pool> --ignore-daemonsets --delete-emptydir-data
kubectl config use-context <mgmt-cluster>kubectl delete machinedeployments ${MD} -n $CNSkubectl delete vspheremachinetemplates ${VMT_NAME} -n $CNSkubectl delete kubeadmconfigtemplates ${KCT_NAME} -n $CNS