Symptoms:
...
{
"can_rollback": true,
"can_skip": false,
"component_type": "MP",
"current_version_node_summary": {
"results": [
{
"component_version": "Done",
"node_count": 2,
"type": "MP",
"upgrade_unit_subtype": "ACTION"
},
{
"component_version": "Pending",
"node_count": 30,
"type": "MP",
"upgrade_unit_subtype": "ACTION"
},
{
"component_version": "3.2.2.0.0.20737190",
"node_count": 1,
"type": "MP",
"upgrade_unit_subtype": "RESOURCE"
}
]
},
"details": "",
"node_count_at_target_version": 2,
"percent_complete": 11,
"pre_upgrade_status": {
"end_time": 1686849901485,
"error_count": 0,
"failure_count": 1,
"start_time": 1686849823612,
"status": "COMPLETED",
"warning_count": 1
},
"status": "FAILED",
"target_component_version": "4.1.0.0.0.21332677"
}
],
"overall_upgrade_status": "PAUSED"
}
"errors": [
"Unexpected error while upgrading upgrade unit: Failed to handover upgrade control to another node. Please ensure that the other nodes are functioning, and retry the upgrade."
],
"group": {
"display_name": "Node OS Upgrade",
"id": "MPNodesGroup"
},
2023-06-15T18:10:30.994Z INFO task-executor-0-workitem-MP-########-####-####-####-##########4b NsxTrustManager 6867 SYSTEM [nsx@6876 comp="nsx-manager" level="INFO" subcomp="upgrade-coordinator"] checkServerTrusted: CN=<customer-domain.com>,OU=<internal-org>,O=<org-name>,C=<Country> for authType=ECDHE_RSA failed: PKIX path building failed: java.security.cert.CertPathBuilderException: Unable to find certificate chain.
2023-06-15T18:10:30.994Z WARN task-executor-0-workitem-MP-########-####-####-####-##########4b HandoverUpgradeUtils 6867 SYSTEM [nsx@6876 comp="nsx-manager" level="WARNING" subcomp="upgrade-coordinator"] UC is not responding
org.springframework.web.client.ResourceAccessException: I/O error on GET request for "https://10.252.192.135:443/api/v1/upgrade/plugin/uc-startup-status": PKIX path building failed: java.security.cert.CertPathBuilderException: Unable to find certificate chain.; nested exception is javax.net.ssl.SSLHandshakeException: PKIX path building failed: java.security.cert.CertPathBuilderException: Unable to find certificate chain.
at org.springframework.web.client.RestTemplate.doExecute(RestTemplate.java:785) ~[spring-web-5.3.20.jar:5.3.20]
at org.springframework.web.client.RestTemplate.execute(RestTemplate.java:711) ~[spring-web-5.3.20.jar:5.3.20]
at org.springframework.web.client.RestTemplate.exchange(RestTemplate.java:602) ~[spring-web-5.3.20.jar:5.3.20]
at com.vmware.nsx.management.common.rest.RestRequestImpl.createEntityAndExchange(RestRequestImpl.java:54) ~[librest-util.jar:?]
at com.vmware.nsx.management.common.rest.RestRequestImpl.doGet(RestRequestImpl.java:73) ~[librest-util.jar:?]
at com.vmware.nsx.management.upgrade.rpcframework.UcRestClient.sendGetRequest(UcRestClient.java:102) ~[libuc-core.jar:?]
at com.vmware.nsx.management.upgrade.utils.HandoverUpgradeUtils.invokeHandover(HandoverUpgradeUtils.java:95) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.utils.HandoverUpgradeUtils.performHandover(HandoverUpgradeUtils.java:72) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.plugin.mp.service.MPRollingUpgradeServiceImpl.upgradeMPNode(MPRollingUpgradeServiceImpl.java:744) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.plugin.mp.service.MPRollingUpgradeServiceImpl.upgrade(MPRollingUpgradeServiceImpl.java:246) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.plugin.mp.MpUpgradePlugin.upgrade(MpUpgradePlugin.java:275) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.executionengine.SingleWorkItem.executeWorkItem(SingleWorkItem.java:115) ~[libuc-core.jar:?]
at com.vmware.nsx.management.upgrade.executionengine.SingleWorkItem.run(SingleWorkItem.java:90) ~[libuc-core.jar:?]
at com.vmware.nsx.management.common.executor.TaskExecutorImpl$TaskWrapper$1.run(TaskExecutorImpl.java:238) ~[libmp_common.jar:?]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_342]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_342]
at com.vmware.nsx.management.common.executor.TaskExecutorImpl$TaskWrapper.run(TaskExecutorImpl.java:271) ~[libmp_common.jar:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_342]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_342]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_342]
2023-06-15T18:10:30.995Z ERROR task-executor-0-workitem-MP-########-####-####-####-##########4b WorkItem 6867 SYSTEM [nsx@6876 comp="nsx-manager" errorCode="MP30062" level="ERROR" subcomp="upgrade-coordinator"] Error encountered while upgrading upgrade unit <Hostname/FQDN>
com.vmware.nsx.management.upgrade.plugin.mp.exceptions.MPUpgradeException: null
at com.vmware.nsx.management.upgrade.utils.HandoverUpgradeUtils.performHandover(HandoverUpgradeUtils.java:79) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.plugin.mp.service.MPRollingUpgradeServiceImpl.upgradeMPNode(MPRollingUpgradeServiceImpl.java:744) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.plugin.mp.service.MPRollingUpgradeServiceImpl.upgrade(MPRollingUpgradeServiceImpl.java:246) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.plugin.mp.MpUpgradePlugin.upgrade(MpUpgradePlugin.java:275) ~[libnsx-upgrade-plugins.jar:?]
at com.vmware.nsx.management.upgrade.executionengine.SingleWorkItem.executeWorkItem(SingleWorkItem.java:115) ~[libuc-core.jar:?]
at com.vmware.nsx.management.upgrade.executionengine.SingleWorkItem.run(SingleWorkItem.java:90) ~[libuc-core.jar:?]
at com.vmware.nsx.management.common.executor.TaskExecutorImpl$TaskWrapper$1.run(TaskExecutorImpl.java:238) ~[libmp_common.jar:?]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_342]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_342]
at com.vmware.nsx.management.common.executor.TaskExecutorImpl$TaskWrapper.run(TaskExecutorImpl.java:271) ~[libmp_common.jar:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_342]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_342]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_342]
2023-06-15T18:10:31.011Z INFO task-executor-0-workitem-MP-########-####-####-####-##########4b ExecutionMonitorServiceImpl 6867 SYSTEM [nsx@6876 comp="nsx-manager" level="INFO" subcomp="upgrade-coordinator"] Execution monitor service invoked to react to failure of node ########-####-####-####-##########4b [Failed to handover upgrade control to another node. Please ensure that the other nodes are functioning, and retry the upgrade.]
Below is an anonymized example of a certificate known to cause this problem. Followed by the format that would avoid it.
Problematic format:
subject=CN=<customer-domain.com>, OU=<internal-org>, O=<org-name>, C=US
issuer=CN=<intermdiate-customer-ca>, OU=<internal-org>, O=<org-name>, C=US
-----BEGIN CERTIFICATE-----
MIIGdjCCBV6gAwIBAgITagAH74aCkhVrWqQdzwAAAAfvhjANBgkqhkiG9w0BAQsF
...
-----END CERTIFICATE-----
subject=CN=<intermediate-customer-ca>, OU=<internal-org>, O=<org-name>, C=US
issuer=CN=<customer-ca>, OU=<internal-org>, O=<org-name>, C=US
-----BEGIN CERTIFICATE-----
MIIFEjCCA/qgAwIBAgIKYQpEzAAAAAAAFzANBgkqhkiG9w0BAQsFADCBijELMAkG ... -----END CERTIFICATE----- subject=CN=<customer-ca>, OU=<internal-org>, O=<org-name>, C=US issuer=CN=<customer-ca>, OU=<internal-org>, O=<org-name>, C=US -----BEGIN CERTIFICATE----- MIID4jCCAsqgAwIBAgIQc2TTvvZGa5pFuLt/IOfHRDANBgkqhkiG9w0BAQsFADCB ... -----END CERTIFICATE-----
Problem-free format:
-----BEGIN CERTIFICATE----- MIIGdjCCBV6gAwIBAgITagAH74aCkhVrWqQdzwAAAAfvhjANBgkqhkiG9w0BAQsF ... -----END CERTIFICATE----- -----BEGIN CERTIFICATE----- MIIFEjCCA/qgAwIBAgIKYQpEzAAAAAAAFzANBgkqhkiG9w0BAQsFADCBijELMAkG ... -----END CERTIFICATE----- -----BEGIN CERTIFICATE----- MIID4jCCAsqgAwIBAgIQc2TTvvZGa5pFuLt/IOfHRDANBgkqhkiG9w0BAQsFADCB ... -----END CERTIFICATE-----