The steps in this page will collect the following information
Workaround:
Generate script to gather node statistics
cat << EOF > $PWD/get-node-stats.sh set -x # Process/OS sudo ulimit -a &> ulimit.out sudo sysctl -a &> sysctl.out sudo cat /proc/loadavg &> loadavg.out sudo cat /proc/sys/kernel/threads-max &> threads-max.out sudo ps -elf &> pself.out sudo ps -elfT &> psthreads.out sudo cat /proc/sys/fs/file-nr &> file-nr.out sudo cat /proc/sys/fs/file-max &> file-max.out sudo lsof -n &> lsof-n.out sudo uname -a &> uname.out sudo vmstat 1 5 &> vmstat.out sudo sar -A &> sar-A.out sudo pidstat -p ALL -T ALL -I -l -r -t -u &> pidstat.out sudo cat /proc/vmstat &> proc-vmstat.out # Memory sudo ps -eo pid,ppid,cmd,vsz,fuser,%mem,%cpu --sort=-%mem | head -n 20 &> psmem.out sudo cat /proc/meminfo &> mem-info.out sudo vmstat -m &> vmstat-m.out sudo cat /proc/slabinfo &> slabinfo.out sudo slabtop -s c -o &> slabtop.out sudo free -m &> free-m.out
sudo dmesg -H &> dmesg-H.out # CPU sudo ps -eo pid,ppid,cmd,vsz,fuser,%mem,%cpu --sort=-%cpu | head -n 20 &> pscpu.out sudo cat /proc/cpuinfo &> cpuinfo.out sudo cat /proc/softirqs &> softirqs.out sudo cat /proc/interrupts &> interrupts.out sudo lscpu &> lscpu.out sudo uptime &> uptime.out sudo mpstat 1 5 -P ALL &> mpstat-all.out # Network sudo netstat -lantupWe &> netstat-lantupWe.out sudo netstat -i &> netstat-i.out sudo netstat -s &> netstat-s.out sudo ss -noemitaup &> ss-noemitaup.out sudo cat /proc/net/dev &> proc-net-dev.out sudo cat /proc/net/sockstat &> sockstat.out sudo cat /proc/net/sockstat6 &> sockstat6.out sudo ip addr &> ip-addr.out sudo route -n &> route-n.out sudo ip rule list &> ip-rule-list.out sudo ip route show table all &> ip-show-table.out sudo iptables -L -v -n --line-numbers &> iptables-all.out sudo conntrack -L &> conntrack-L.out sudo conntrack -S &> conntrack-S.out # Disk/IO sudo df -ih --total &> df-ih.out sudo df -ah --total &> df-ah.out sudo arp -an &> arp-an.out sudo iostat 1 5 &> iostat.out # Bundle Artifacts sudo tar czvf /var/log/node-stats-\$(date +%F-%H-%M-%Z).tar.gz *.out EOF
Collect Kubernetes Cluster Logs Using Bosh CLI
# Setup Bosh Deployment ID export BOSH_LOG_DEPLOYMENT="service-instance_8e4c492e-daae-4e72-94fb-1f6430f7f4a7" # Cluster control plane nodes log collection bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" master "sudo rm /tmp/get-node-stats.sh" bosh scp -d "${BOSH_LOG_DEPLOYMENT}" $PWD/get-node-stats.sh master:/tmp/get-node-stats.sh bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" master "sudo install -C -m 775 -o vcap -g vcap /tmp/get-node-stats.sh /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" master "sudo bash /var/log/get-node-stats.sh" # Cluster worker nodes log collection bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" worker "sudo rm /tmp/get-node-stats.sh" bosh scp -d "${BOSH_LOG_DEPLOYMENT}" $PWD/get-node-stats.sh worker:/tmp/get-node-stats.sh bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" worker "sudo install -C -m 775 -o vcap -g vcap /tmp/get-node-stats.sh /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" worker "sudo bash /var/log/get-node-stats.sh" # Setup for /var/log/ collection. This will include all relevant OS logs, sar data and customer application pod logs bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" master "sudo ln -s /var/log /var/vcap/sys/log/custom-varlog" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" worker "sudo ln -s /var/log /var/vcap/sys/log/custom-varlog" bosh logs -d "${BOSH_LOG_DEPLOYMENT}" # Important Step - Remove symlink for cleanup and executing the script next time bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" master "sudo rm /var/vcap/sys/log/custom-varlog" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" worker "sudo rm /var/vcap/sys/log/custom-varlog"
Collect TKGI Deployment Logs Using Bosh
export BOSH_LOG_DEPLOYMENT="pivotal-container-service-a3eb6951085e47e40419" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pivotal-container-service "sudo rm /tmp/get-node-stats.sh" bosh scp -d "${BOSH_LOG_DEPLOYMENT}" $PWD/get-node-stats.sh pivotal-container-service:/tmp/get-node-stats.sh bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pivotal-container-service "sudo install -C -m 775 -o vcap -g vcap /tmp/get-node-stats.sh /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pivotal-container-service "sudo bash /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pks-db "sudo rm /tmp/get-node-stats.sh" bosh scp -d "${BOSH_LOG_DEPLOYMENT}" $PWD/get-node-stats.sh pks-db:/tmp/get-node-stats.sh bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pks-db "sudo install -C -m 775 -o vcap -g vcap /tmp/get-node-stats.sh /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pks-db "sudo bash /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pivotal-container-service "sudo ln -s /var/log /var/vcap/sys/log/custom-varlog" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pks-db "sudo ln -s /var/log /var/vcap/sys/log/custom-varlog" bosh logs -d "${BOSH_LOG_DEPLOYMENT}" # Important Step - Remove symlink for cleanup and executing the script next time bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pivotal-container-service "sudo rm /var/vcap/sys/log/custom-varlog" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" pks-db "sudo rm /var/vcap/sys/log/custom-varlog"
Collect Harbor Deployment Logs Using Bosh
export BOSH_LOG_DEPLOYMENT="harbor-container-registry-d4601cf8b13de303f734" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" harbor-app "sudo rm /tmp/get-node-stats.sh" bosh scp -d "${BOSH_LOG_DEPLOYMENT}" $PWD/get-node-stats.sh harbor-app:/tmp/get-node-stats.sh bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" harbor-app "sudo install -C -m 775 -o vcap -g vcap /tmp/get-node-stats.sh /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" harbor-app "sudo bash /var/log/get-node-stats.sh" bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" harbor-app "sudo ln -s /var/log /var/vcap/sys/log/custom-varlog" bosh logs -d "${BOSH_LOG_DEPLOYMENT}" # Important Step - Remove symlink for cleanup and executing the script next time bosh ssh -d "${BOSH_LOG_DEPLOYMENT}" harbor-app "sudo rm /var/vcap/sys/log/custom-varlog"