The steps in this KB can be used to collect logs manually
Workaround:
Generate script to gather node statistics
cat << EOF > $PWD/get-node-stats.sh set -x # Process/OS sudo ulimit -a &> ulimit.out sudo sysctl -a &> sysctl.out sudo cat /proc/loadavg &> loadavg.out sudo cat /proc/sys/kernel/threads-max &> threads-max.out sudo ps -elf &> pself.out sudo ps -elfT &> psthreads.out sudo cat /proc/sys/fs/file-nr &> file-nr.out sudo cat /proc/sys/fs/file-max &> file-max.out sudo lsof -n &> lsof-n.out sudo uname -a &> uname.out sudo vmstat 1 5 &> vmstat.out sudo sar -A &> sar-A.out sudo pidstat -p ALL -T ALL -I -l -r -t -u &> pidstat.out sudo cat /proc/vmstat &> proc-vmstat.out # Memory sudo ps -eo pid,ppid,cmd,vsz,fuser,%mem,%cpu --sort=-%mem | head -n 20 &> psmem.out sudo cat /proc/meminfo &> mem-info.out sudo vmstat -m &> vmstat-m.out sudo cat /proc/slabinfo &> slabinfo.out sudo slabtop -s c -o &> slabtop.out sudo free -m &> free-m.out # CPU sudo ps -eo pid,ppid,cmd,vsz,fuser,%mem,%cpu --sort=-%cpu | head -n 20 &> pscpu.out sudo cat /proc/cpuinfo &> cpuinfo.out sudo cat /proc/softirqs &> softirqs.out sudo cat /proc/interrupts &> interrupts.out sudo lscpu &> lscpu.out sudo uptime &> uptime.out sudo mpstat 1 5 -P ALL &> mpstat-all.out # Network sudo netstat -lantupWe &> netstat-lantupWe.out sudo netstat -i &> netstat-i.out sudo netstat -s &> netstat-s.out sudo ss -noemitaup &> ss-noemitaup.out sudo cat /proc/net/dev &> proc-net-dev.out sudo cat /proc/net/sockstat &> sockstat.out sudo cat /proc/net/sockstat6 &> sockstat6.out sudo ip addr &> ip-addr.out sudo route -n &> route-n.out sudo ip rule list &> ip-rule-list.out sudo ip route show table all &> ip-show-table.out sudo iptables -L -v -n --line-numbers &> iptables-all.out sudo conntrack -L &> conntrack-L.out sudo conntrack -S &> conntrack-S.out # Disk/IO sudo df -ih --total &> df-ih.out sudo df -ah --total &> df-ah.out sudo arp -an &> arp-an.out
sudo iostat -cdxh 1 10 &> iostat.out
# Kubernetes and containerd sudo journalctl -xeu containerd &> containerd.out sudo journalctl -xeu kubelet &> kubelet.out sudo crictl info &> crictlinfo.out # Bundle Artifacts sudo tar czvf /var/log/node-stats-\$(date +%F-%H-%M-%Z).tar.gz *.out EOF
Generate script to gather node logs
cat << EOF > $PWD/get-node-logs.sh set -x sudo tar czvf /home/capv/\$1-varlog.tar.gz --exclude='/var/log/journal/*/*@*' /var/log/ sudo chown capv:users /home/capv/\$1-varlog.tar.gz EOF
Kickoff Log Collection
chmod +x get-node-logs.sh get-node-stats.sh kubectl get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="ExternalIP")].address}' > $HOME/nodes # Update the key path according to your environment export SSH_KEY_PATH=$HOME/.ssh/id_rsa for i in `cat $HOME/nodes`; do echo "Collecting data on node $i" ssh -i $SSH_KEY_PATH -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" -q capv@$i 'bash -s' < get-node-stats.sh echo "Collecting logs on node $i" ssh -i $SSH_KEY_PATH -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" -q capv@$i 'bash -s' < get-node-logs.sh $i echo "Transferring files from node $i to local path $HOME" scp -i $SSH_KEY_PATH -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" capv@$i:/home/capv/$i-varlog.tar.gz $HOME/. echo "Removing collected artifacts from node $i" ssh -i $SSH_KEY_PATH -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" -q capv@$i sudo rm -rf /var/log/node-stats-*.tar.gz /home/capv/$i-varlog.tar.gz /home/capv/*.out done;