Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(monitor): provide valid exitcode #4155

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
304 changes: 171 additions & 133 deletions lgsm/functions/command_monitor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,58 +11,66 @@ commandaction="Monitoring"
functionselfname="$(basename "$(readlink -f "${BASH_SOURCE[0]}")")"
fn_firstcommand_set

fn__restart_server() {
if [ -f "${lockdir}/${selfname}.lock" ]; then
alert="${1:?}}"
alert.sh
(
fn_print_info_nl "Restarting the server and skip immediate exit"
exitbypass=1
command_stop.sh
command_start.sh
echo "" # start doesn't always print newline
)
fn_firstcommand_reset
else
fn_print_warn_nl "Skipping server restart because lockfile is missing, probably server is restarted during monitor execution."
fi
}

fn_monitor_check_lockfile() {
# Monitor does not run it lockfile is not found.
fn_print_dots "Checking lockfile"

# Monitor does not run if lockfile is not found.
if [ ! -f "${lockdir}/${selfname}.lock" ]; then
fn_print_dots "Checking lockfile: "
fn_print_checking_eol
fn_script_log_info "Checking lockfile: CHECKING"
fn_print_error "Checking lockfile: No lockfile found: "
fn_print_error_eol_nl
fn_script_log_error "Checking lockfile: No lockfile found: ERROR"
echo -e "* Start ${selfname} to run monitor."
fn_print_fail_nl "Checking lockfile: No lockfile found"
exitcode="3"
core_exit.sh
fi

# Fix if lockfile is not unix time or contains letters
if [ -f "${lockdir}/${selfname}.lock" ] && [[ "$(head -n 1 "${lockdir}/${selfname}.lock")" =~ [A-Za-z] ]]; then
elif [[ "$(head -n 1 "${lockdir}/${selfname}.lock")" =~ [A-Za-z] ]]; then
fn_print_warn_nl "Checking lockfile: fixing illegal lockfile"
date '+%s' > "${lockdir}/${selfname}.lock"
echo "${version}" >> "${lockdir}/${selfname}.lock"
echo "${port}" >> "${lockdir}/${selfname}.lock"
else
fn_print_ok_nl "Checking lockfile"
fi
}

fn_monitor_check_update() {
fn_print_dots "Checking active updates"

# Monitor will check if update is already running.
if [ "$(pgrep "${selfname} update" | wc -l)" != "0" ]; then
fn_print_dots "Checking active updates: "
fn_print_checking_eol
fn_script_log_info "Checking active updates: CHECKING"
fn_print_error_nl "Checking active updates: SteamCMD is currently checking for updates: "
fn_print_error_eol
fn_script_log_error "Checking active updates: SteamCMD is currently checking for updates: ERROR"
fn_print_fail_nl "SteamCMD is currently checking for updates"
exitcode="2"
core_exit.sh
else
fn_print_ok_nl "Checking active updates"
fi
}

fn_monitor_check_session() {
fn_print_dots "Checking session: "
fn_print_checking_eol
fn_script_log_info "Checking session: CHECKING"
fn_monitor_is_server_running() {
fn_print_dots "Checking session"

# uses status var from check_status.sh
if [ "${status}" != "0" ]; then
fn_print_ok "Checking session: "
fn_print_ok_eol_nl
fn_script_log_pass "Checking session: OK"
fn_print_ok_nl "Checking session"
return 0
else
fn_print_error "Checking session: "
fn_print_fail_eol_nl
fn_script_log_fatal "Checking session: FAIL"
alert="restart"
alert.sh
fn_script_log_info "Checking session: Monitor is restarting ${selfname}"
command_restart.sh
core_exit.sh
fn_print_error_nl "Checking session"
return 1
fi
}

Expand All @@ -81,6 +89,7 @@ fn_monitor_check_queryport() {
fi
core_exit.sh
fi
return 0
}

fn_query_gsquery() {
Expand All @@ -92,134 +101,150 @@ fn_query_gsquery() {
}

fn_query_tcp() {
bash -c 'exec 3<> /dev/tcp/'${queryip}'/'${queryport}'' > /dev/null 2>&1
bash -c "exec 3<> '/dev/tcp/${queryip}/${queryport}'" > /dev/null 2>&1
querystatus="$?"
}

fn_monitor_query() {
local fail_after="60" # seconds
local time_per_attempt="3"
local max_attempts="5"
local wait_between_attempts="$(( (fail_after-max_attempts*time_per_attempt) / (max_attempts-1) ))"

# Will loop and query up to 5 times every 15 seconds.
# Query will wait up to 60 seconds to confirm server is down as server can become non-responsive during map changes.
totalseconds=0
for queryattempt in {1..5}; do
for queryattempt in $(seq 1 "${max_attempts}" ); do

for queryip in "${queryips[@]}"; do
fn_print_dots "Querying port: ${querymethod}: ${queryip}:${queryport} : ${totalseconds}/${queryattempt}: "
fn_print_querying_eol
fn_script_log_info "Querying port: ${querymethod}: ${queryip}:${queryport} : ${queryattempt} : QUERYING"
# querydelay
if [ "$(head -n 1 "${lockdir}/${selfname}.lock")" -gt "$(date "+%s" -d "${querydelay} mins ago")" ]; then
fn_print_ok "Querying port: ${querymethod}: ${ip}:${queryport} : ${totalseconds}/${queryattempt}: "
fn_print_delay_eol_nl
fn_script_log_info "Querying port: ${querymethod}: ${ip}:${queryport} : ${queryattempt} : DELAY"
fn_script_log_info "Query bypassed: ${gameservername} started less than ${querydelay} minutes ago"
fn_script_log_info "Server started: $(date -d @$(head -n 1 "${lockdir}/${selfname}.lock"))"
fn_script_log_info "Current time: $(date)"
monitorpass=1
core_exit.sh
local log_msg="Starting to query in mode \"${querymethod}\" to target \"${queryip}:${queryport}\" attempt ${queryattempt} / ${max_attempts}"
fn_print_dots "${log_msg}"

# will use query method selected in fn_monitor_loop
# gamedig
elif [ "${querymethod}" == "gamedig" ]; then
querystatus="100"
if [ "${querymethod}" == "gamedig" ]; then
query_gamedig.sh
# gsquery
elif [ "${querymethod}" == "gsquery" ]; then

elif [ "${querymethod}" == "gsquery" ]; then
fn_query_gsquery
#tcp query
elif [ "${querymethod}" == "tcp" ]; then

elif [ "${querymethod}" == "tcp" ]; then
fn_query_tcp

else
fn_print_fail_nl "${log_msg} reason: unhandled query method \"${querymethod}\""
fi

# if serverquery is fine
if [ "${querystatus}" == "0" ]; then
# Server query OK.
fn_print_ok "Querying port: ${querymethod}: ${queryip}:${queryport} : ${totalseconds}/${queryattempt}: "
fn_print_ok_eol_nl
fn_script_log_pass "Querying port: ${querymethod}: ${queryip}:${queryport} : ${queryattempt} : OK"
monitorpass=1
if [ "${querystatus}" == "0" ]; then
# Add query data to log.
if [ "${gdname}" ]; then
fn_script_log_info "Server name: ${gdname}"
fi
if [ "${gdplayers}" ]; then
fn_script_log_info "Players: ${gdplayers}/${gdmaxplayers}"
fi
if [ "${gdbots}" ]; then
fn_script_log_info "Bots: ${gdbots}"
fi
if [ "${gdmap}" ]; then
fn_script_log_info "Map: ${gdmap}"
fi
if [ "${gdgamemode}" ]; then
fn_script_log_info "Game Mode: ${gdgamemode}"
fi

# send LinuxGSM stats if monitor is OK.
if [ "${stats}" == "on" ] || [ "${stats}" == "y" ]; then
info_stats.sh
fi
fn_print_ok_nl "${log_msg}"

# Add query data to log.
if [ "${gdname}" ]; then
fn_script_log_info "Server name: ${gdname}"
fi
core_exit.sh
else
# Server query FAIL.
fn_print_fail "Querying port: ${querymethod}: ${queryip}:${queryport} : ${totalseconds}/${queryattempt}: "
fn_print_fail_eol
fn_script_log_warn "Querying port: ${querymethod}: ${queryip}:${queryport} : ${queryattempt} : FAIL"
# Monitor will try gamedig (if supported) for first 30s then gsquery before restarting.
# gsquery will fail if longer than 60s
if [ "${totalseconds}" -ge "59" ]; then
# Monitor will FAIL if over 60s and trigger gane server reboot.
fn_print_fail "Querying port: ${querymethod}: ${queryip}:${queryport} : ${totalseconds}/${queryattempt}: "
fn_print_fail_eol_nl
fn_script_log_warn "Querying port: ${querymethod}: ${queryip}:${queryport} : ${queryattempt} : FAIL"
# Send alert if enabled.
alert="restartquery"
alert.sh
command_restart.sh
fn_firstcommand_reset
core_exit.sh
if [ "${gdplayers}" ]; then
fn_script_log_info "Players: ${gdplayers}/${gdmaxplayers}"
fi
if [ "${gdbots}" ]; then
fn_script_log_info "Bots: ${gdbots}"
fi
if [ "${gdmap}" ]; then
fn_script_log_info "Map: ${gdmap}"
fi
if [ "${gdgamemode}" ]; then
fn_script_log_info "Game Mode: ${gdgamemode}"
fi

# send LinuxGSM stats if monitor is OK.
if [ "${stats}" == "on" ]||[ "${stats}" == "y" ]; then
info_stats.sh
fi

return 0
else
fn_print_warn_nl "${log_msg} querystatus=\"${querystatus}\""
fi
done
# Second counter will wait for 15s before breaking loop.
for seconds in {1..15}; do
fn_print_fail "Querying port: ${querymethod}: ${ip}:${queryport} : ${totalseconds}/${queryattempt} : ${cyan}WAIT${default}"
sleep 0.5
totalseconds=$((totalseconds + 1))
if [ "${seconds}" == "15" ]; then
break

# monitoring attempt failed, show details to resolve the issue:
if ! ss -tuplwn | grep -qFe ":${queryport} "; then
fn_print_warn_nl "Port is not in use right now \"${queryport}\". Check command details for ports, use provided command to check if every port is used + console to validate server is booted. Maybe server didn't boot, e.g. a second port which is needed is already in use by another application or the configuration for the queryport is incorrect."
else
# return value of following lines arent used so not an issue
#shellcheck disable=SC2155
local process_using_port="$( ss -tuplwn "( dport = :${queryport} or sport = :${queryport} )" | grep -o '[^ ]*$')"
#shellcheck disable=SC2155
local listen_on="$( ss -tuplwn "( dport = :${queryport} or sport = :${queryport} )" | grep -o "[^ ]*:${queryport} ")"

local msg="Found application \"${process_using_port}\" which listens on \"${listen_on}\""
if ! ss -tuplwn "( dport = :${queryport} or sport = :${queryport} )" | grep -qs '^[^ ]*\s*[^ ]*\s*0\s*'; then
fn_print_warn_nl "$msg but Recv-Q isn't empty. Server didn't read the message we send, e.g. server is booting, has an issue which prevents correct initialization or the port is in use by another program."
else
fn_print_info_nl "$msg and Recv-Q is empty, the application read our send message but didn't answer as expected. Maybe \"${queryport}\" is not the querypot or incorrect query method (e.g. gamedig protocol) used?"
fi
done
fi

# delay next init
if [ "${queryattempt}" != "${max_attempts}" ]; then
local explanation="e.g. maybe it failed because of server starting / map change / workshop download"
fn_print_info "delayed next attempt for ${wait_between_attempts}s, $explanation"
for i in $(seq 1 "${wait_between_attempts}"); do
sleep 1s
fn_print_info "delayed next attempt for $((wait_between_attempts - i))s, $explanation"
done
fn_print_info_nl "monitoring delayed for ${wait_between_attempts}s, $explanation"
fi
done
return 1
}

fn_monitor_await_execution_time() {
# Add a querydelay of 1 min if var missing.
querydelay="${querydelay:-"1"}"

last_execution="$(head -n 1 "${lockdir}/${selfname}.lock")"
delay_seconds="$((querydelay * 60))"
next_allowed_execution="$((last_execution + delay_seconds))"
seconds_to_wait="$((next_allowed_execution - $(date '+%s')))"

if [ "${seconds_to_wait}" -gt "0" ]; then
fn_print_dots "monitoring delayed for ${seconds_to_wait}s"
for i in $(seq "${seconds_to_wait}" -1 1); do
sleep 1s
fn_print_info "monitoring delayed for ${i}s"
done
fn_print_info_nl "monitoring delayed for ${seconds_to_wait}s"
fi
}

fn_monitor_loop() {
is_gamedig_installed="$( command -v gamedig 2>/dev/null 1>&2 && command -v jq 2>/dev/null 1>&2 && echo true || echo false )"

# loop though query methods selected by querymode.
totalseconds=0
if [ "${querymode}" == "2" ]; then
local query_methods_array=(gamedig gsquery)
elif [ "${querymode}" == "3" ]; then
local query_methods_array=(gamedig)
elif [ "${querymode}" == "4" ]; then
local query_methods_array=(gsquery)
local query_methods_array=( gsquery )
elif [ "${querymode}" == "5" ]; then
local query_methods_array=(tcp)
local query_methods_array=( tcp )
else
fn_print_fail_nl "monitoring function invoced but querymode has an illegal value ${querymode}"
return 1
fi

for querymethod in "${query_methods_array[@]}"; do
# Will check if gamedig is installed and bypass if not.
if [ "${querymethod}" == "gamedig" ]; then
if [ "$(command -v gamedig 2> /dev/null)" ] && [ "$(command -v jq 2> /dev/null)" ]; then
if [ -z "${monitorpass}" ]; then
fn_monitor_query
fi
else
fn_script_log_info "gamedig is not installed"
fn_script_log_info "https://docs.linuxgsm.com/requirements/gamedig"
fi
else
# will not query if query already passed.
if [ -z "${monitorpass}" ]; then
fn_monitor_query
fi
if [ "${querymethod}" == "gamedig" ] && ! "${is_gamedig_installed}"; then
fn_print_warn_nl "gamedig is not installed"
fn_print_warn_nl "https://docs.linuxgsm.com/requirements/gamedig"
elif fn_monitor_query; then
fn_print_complete_nl "monitoring successful"
return 0
fi
done
return 1
}

monitorflag=1
Expand All @@ -228,18 +253,31 @@ core_logs.sh
info_game.sh

# query pre-checks
fn_monitor_await_execution_time
fn_monitor_check_lockfile
fn_monitor_check_update
fn_monitor_check_session
# Monitor will not continue if session only check.
if [ "${querymode}" != "1" ]; then
fn_monitor_check_queryport
check_only_if_running="$([ "${querymode}" == "1" ] && echo true || echo false )"

# Add a querydelay of 1 min if var missing.
if [ -z "${querydelay}" ]; then
querydelay="1"
fi
exitcode="1" # if not altered below, coding error => FATAL
if ! fn_monitor_is_server_running; then
fn__restart_server "restart"
exitcode="3"

# if monitor should only check only session
elif "${check_only_if_running}"; then
exitcode="0"

fn_monitor_loop
elif ! fn_monitor_is_queryport_valid; then
exitcode="2" # error because maybe unfixable
# no restart because config issue !

# server could be queried with tcp / gsquery / gamedig
elif fn_monitor_loop; then
exitcode="0"

else
fn__restart_server "restartquery"
exitcode="3"
fi

core_exit.sh
1 change: 1 addition & 0 deletions lgsm/functions/install_server_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ fn_install_server_files() {
else
fn_print_fail_nl "Installing ${gamename} Server failed, missing default configuration"
fn_script_log_fatal "Installing ${gamename} Server failed, missing default configuration"
core_exit.sh
fi
fn_fetch_file "${remote_fileurl}" "" "" "" "${local_filedir}" "${local_filename}" "${chmodx}" "${run}" "${forcedl}" "${md5}"
fn_dl_extract "${local_filedir}" "${local_filename}" "${serverfiles}"
Expand Down