When SGW (Sensor Gateway) runs for quite a long time, the /var/log/audit/audit.log file grows to a size more than 50GB resulting in filling up the root directory disk space which results in SGW docker container turning into unhealthy status, or not getting created during a reboot with the following error message observed when checking the sgw.service status
root@photon-machine [ /home/admin ]# docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
root@photon-machine [ /home/admin ]# systemctl status sgw.service
* sgw.service - Sensor Gateway service
Loaded: loaded (/etc/systemd/system/sgw.service; enabled; vendor preset: enabled)
Active: failed (Result: exit-code) since Tue 2024-11-05 20:12:38 UTC; 10h ago
Process: 161131 ExecStart=/usr/bin/python3 /opt/vmware/sgw/etc/sgw.py (code=exited, status=1/FAILURE)
Main PID: 161131 (code=exited, status=1/FAILURE)
Nov 05 20:12:38 photon-machine python3[161131]: File "/opt/vmware/sgw/etc/sgw.py", line 539, in <module>
Nov 05 20:12:38 photon-machine python3[161131]: main()
Nov 05 20:12:38 photon-machine python3[161131]: File "/opt/vmware/sgw/etc/sgw.py", line 533, in main
Nov 05 20:12:38 photon-machine python3[161131]: write_welcometext()
Nov 05 20:12:38 photon-machine python3[161131]: File "/opt/vmware/sgw/etc/sgw.py", line 426, in write_welcometext
Nov 05 20:12:38 photon-machine python3[161131]: with open(welcometext_file, "w") as wc_file:
Nov 05 20:12:38 photon-machine python3[161131]: OSError: [Errno 28] No space left on device
Nov 05 20:12:38 photon-machine systemd[1]: sgw.service: Main process exited, code=exited, status=1/FAILURE
Nov 05 20:12:38 photon-machine systemd[1]: sgw.service: Failed with result 'exit-code'.
Nov 05 20:12:38 photon-machine systemd[1]: Failed to start Sensor Gateway service.
Disk space
root@photon-machine [ /home/admin ]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 4.0M 0 4.0M 0% /dev
tmpfs 3.9G 0 3.9G 0% /dev/shm
tmpfs 1.6G 49M 1.6G 4% /run
tmpfs 4.0M 0 4.0M 0% /sys/fs/cgroup
/dev/sda6 67G 67G 0 100% /
/dev/sda5 6.5M 14K 5.9M 1% /var/vmware/config
/dev/sda3 488M 46M 407M 11% /boot
/dev/sda2 10M 2.0M 8.1M 20% /boot/efi
tmpfs 3.0G 18M 3.0G 1% /tmp
/dev/mapper/logs_vg-sgw 9.8G 24K 9.3G 1% /var/log/sgw
/dev/mapper/data_vg-bundle 9.8G 32K 9.3G 1% /var/vmware/bundle
tmpfs 795M 0 795M 0% /run/user/1000
SGW 1.2+
The auditd configuration file does not have ROTATE settings configured for audit.log file under max_log_file_action
root@photon-machine [ /home/admin ]# cat /etc/audit/auditd.conf
#
# This file controls the configuration of the audit daemon
#
local_events = yes
write_logs = yes
log_file = /var/log/audit/audit.log
log_group = root
log_format = RAW
flush = INCREMENTAL_ASYNC
freq = 50
max_log_file = 8num_logs = 5
priority_boost = 4
disp_qos = lossy
dispatcher = /sbin/audispd
name_format = NONE
##name = mydomain
max_log_file_action = IGNOREspace_left = 25%
space_left_action = SYSLOG
verify_email = yes
action_mail_acct = root
admin_space_left = 50
admin_space_left_action = SYSLOG
disk_full_action = SYSLOG
disk_error_action = SYSLOG
use_libwrap = yes
##tcp_listen_port = 60
tcp_listen_queue = 5
tcp_max_per_addr = 1
##tcp_client_ports = 1024-65535
tcp_client_max_idle = 0
enable_krb5 = no
krb5_principal = auditd
##krb5_key_file = /etc/audit/audit.key
distribute_network = no
This issue has been reported internally, and a permanent fix will be released for this issue in future releases.
Workaround to fix the immediate issue:
#su
#cd /tmp/
#!/bin/bash
#
# Script to create logrotate configuraton files and modify auditd.conf
# Update /etc/audit/auditd.conf
AUDITD_CONF="/etc/audit/auditd.conf"
# Modify max_log_file to 1024
sed -i 's/^max_log_file = .*/max_log_file = 1024/' "$AUDITD_CONF"
# Modify max_log_file_action to ROTATE
sed -i 's/^max_log_file_action = .*/max_log_file_action = ROTATE/' "$AUDITD_CONF"
# Soft reload of auditd configuration if the process is running by causing auditd process
# to re-read /etc/audit/auditd.conf without fully stopping or restarting the service.
# Get the PID of auditd
AUDITD_PID=$(pidof auditd)
if [ -n "$AUDITD_PID" ]; then
kill -HUP "$AUDITD_PID"
else
echo "auditd is not running."
fi
root@photon-machine [ /tmp ]# chmod 755 configure_logrotate_auditd.sh
root@photon-machine [ /tmp ]# ./configure_logrotate_auditd.sh
#
# This file controls the configuration of the audit daemon
#
local_events = yes
write_logs = yes
log_file = /var/log/audit/audit.log
log_group = root
log_format = RAW
flush = INCREMENTAL_ASYNC
freq = 50
max_log_file = 1024
num_logs = 5
priority_boost = 4
disp_qos = lossy
dispatcher = /sbin/audispd
name_format = NONE
##name = mydomain
max_log_file_action = ROTATE
space_left = 25%
space_left_action = SYSLOG
verify_email = yes
action_mail_acct = root
admin_space_left = 50
admin_space_left_action = SYSLOG
disk_full_action = SYSLOG
disk_error_action = SYSLOG
use_libwrap = yes
##tcp_listen_port = 60
tcp_listen_queue = 5
tcp_max_per_addr = 1
##tcp_client_ports = 1024-65535
tcp_client_max_idle = 0
enable_krb5 = no
krb5_principal = auditd
##krb5_key_file = /etc/audit/audit.key
distribute_network = no