Very useful script to check LPAR credits (Henrik Morsing)
#!/bin/ksh93
# Performance recommendation tool
#
# Copyright Henrik Morsing, 2022
#
# Initial version 1.0
# 09-11-2022 Henrik Morsing 1.1 Added more informative output
# and correct when to alert (6 digits, not 5)
# Set a reference to current days up
ref="$(uptime | grep days | awk '{ print $3 }')"
#
# If less than a day or two, exit, less than twenty, warn
if [[ "${ref}" == "" ]]
then
echo "System uptime too low."
exit 1
elseif [[ "${ref}" -lt 20 ]]
echo "System uptime too low to give accurate results."
fi
echo
echo "Starting System Performance Analyser v1.0"
echo
echo "System Name: $(uname -n) - System Uptime Days: ${ref}"
echo
echo "Please bear in mind, as stats used are accumulated over time,"
echo "they can be a view of the past and issues may already have been rectified."
echo
echo
#####################
# MEMORY
#####################
echo "\t *** MEMORY CHECKS ***"
echo
echo "Add more memory to rectify these"
echo
# Start by checking some memory variables
# Read paging space page outs, revolutions of the clock hand, free frame waits
vmstat -s | grep -E 'paging space page outs|revolutions of the clock hand|free frame waits' | awk '{ print $1 } ' | tr '\n' ' ' | read page_outs revolutions frame_waits
# First, convert to 90 day reference
page_outs_90=$(( ${page_outs}/${ref}*90 ))
revolutions_90=$(( ${revolutions}/${ref}*90 ))
frame_waits_90=$(( ${frame_waits}/${ref}*90 ))
# echo ${page_outs_90}
# echo ${revolutions_90}
# echo ${frame_waits_90}
# Then, find number of digits
page_outs_digits=${#page_outs_90}
frame_waits_digits=${#frame_waits_90}
# echo "${page_outs_digits}"
# echo "${frame_waits_digits}"
# Check on numbers and warn as needed
if [[ ${page_outs_digits} -gt 7 || ${revolutions} -gt $(( ${ref}*100 )) || ${frame_waits_digits} -gt 6 ]]
then
echo "You are extremely memory constrained:"
[[ ${page_outs_digits} -gt 7 ]] && echo "- \033[1;31m'paging space page outs' extremely is high:\033[m ${page_outs} -> ${page_outs_90} per 90 days (${page_outs_digits} digits)"
[[ ${revolutions} -gt $(( ${ref}*100 )) ]] && echo "- \033[1;31m'revolutions of the clock hand' is extremely high:\033[m ${revolutions} -> ${revolutions_90} per 90 days"
[[ ${frame_waits_digits} -gt 6 ]] && echo "- \033[1;31m'free frame waits' is extremely high:\033[m ${frame_waits} -> ${frame_waits_90} per 90 days (${frame_waits_digits} digits)"
elif [[ ${page_outs_digits} -gt 6 || ${revolutions} -gt $(( ${ref}*10 )) || ${frame_waits_digits} -gt 5 ]]
then
echo "You are very memory constrained:"
[[ ${page_outs_digits} -gt 6 ]] && echo "- \033[1;33m'paging space page outs' very is high:\033[m ${page_outs} -> ${page_outs_90} per 90 days (${page_outs_digits} digits)"
[[ ${revolutions} -gt $(( ${ref}*10 )) ]] && echo "- \033[1;33m'revolutions of the clock hand' is very high:\033[m ${revolutions} -> ${revolutions_90} per 90 days"
[[ ${frame_waits_digits} -gt 5 ]] && echo "- \033[1;33m'free frame waits' is very high:\033[m ${frame_waits} -> ${frame_waits_90} per 90 days (${frame_waits_digits} digits)"
elif [[ ${page_outs_digits} -gt 5 || ${revolutions} -gt ${ref} || ${frame_waits_digits} -gt 4 ]]
then
echo "You could benefit from adding more memory:"
[[ ${page_outs_digits} -gt 5 ]] && echo "- 'paging space page outs' is high: ${page_outs} -> ${page_outs_90} per 90 days (${page_outs_digits} digits)"
[[ ${revolutions} -gt ${ref} ]] && echo "- 'revolutions of the clock hand' is high: ${revolutions} -> ${revolutions_90} per 90 days"
[[ ${frame_waits_digits} -gt 4 ]] && echo "- 'free frame waits' is high: ${frame_waits} -> ${frame_waits_90} per 90 days (${frame_waits_digits} digits)"
fi
#####################
# PROCESSOR
#####################
echo
echo "\t *** PROCESSOR CHECKS ***"
echo
# Checking for LPAR SRAD spreading
num_srads="$(lssrad -a | grep -v SRAD | wc -l)"
vCPUs_online="$(lparstat -i | grep 'Online Virtual CPUs' | awk '{ print $NF }').0"
vCPUs_max="$(lparstat -i | grep "Maximum Virtual CPUs" | awk '{ print $NF }')"
Entitlement="$(lparstat -i | grep "Entitled Capacity" | grep -v "Pool" | awk '{ print $NF }')"
if [[ ${num_srads} -gt "2" ]]
then
echo "LPAR is spread across multiple SRADs (${num_srads}). If memory (2TB?) and max processor allocations (less than 15 vCPUs, currently ${vCPUs_max}) suggests it can be contained within one SRAD, powering the LPAR off and on again might align it correctly."
fi
echo
printf "*** Checking spreading factor ***"
if [[ ${vCPUs_online} -gt "1" ]]
then
if [[ ${spreading} -gt 2 ]]
then
echo "\t[\033[1;33mWARNING\033[m]"
echo "Number of virtual processors is high compared to entitlement."
else
echo "\t[\033[1;32mOK\033[m]"
fi
fi
#####################
# I/O
#####################
# Starting from the top, VGs first
echo
echo "\t *** I/O CHECKS ***"
echo
for volgroup in $(lsvg -o)
do
printf "*** Checking ${volgroup} ***"
msg=false
##################
# Checking pbufs #
##################
# Count blocked I/Os with no pbuf
pervg_blocked_io_count=$(/usr/sbin/lvmo -v ${volgroup} -o pervg_blocked_io_count)
# Reference to 90 days
pbio_90=$(( ${pervg_blocked_io_count}/${ref}*90 ))
# Find number of digits
pbio_digits=${#pbio_90}
# Recommendation based on number of digits
if [[ ${pbio_digits} -gt 6 ]]
then
url=true
echo "\t[\033[1;33mWARNING\033[m]"
# Calculate recommended pv_pbuf_count for VG
pbuf_curr=$(lvmo -v ${volgroup} -o pv_pbuf_count)
pbuf_vg=$(( ${pbuf_curr}+16384 ))
echo "Volume group ${volgroup} is extremely low on pbufs"
echo "- \033[1;31m'pending disk I/Os blocked with no pbuf' is extremely high:\033[m ${pbuf_curr}. Increase 'pv_pbuf_count' to ${pbuf_vg}.\n"
else
echo "\t[\033[1;32mOK\033[m]"
fi
done
###################
# Checking psbufs #
###################
# Count blocked paging space I/O with no psbuf
vmstat -v | grep -E 'paging space I/Os blocked with no psbuf|external pager filesystem I/Os blocked with no fsbuf' | awk '{ print $1 } ' | tr '\n' ' ' | read psbuf fsbuf
# Reference to 90 days
psio_90=$(( ${psbuf}/${ref}*90 ))
# Any psbufs blocked is bad
if [[ ${#psio_90} -gt 1 ]]
then
url=true
printf "[\033[1;33mWARNING\033[m] "
echo "\033[1;31mpsbufs is above 10\033[m, indicating severe memory restriction causing excessive paging. If you cannot add memory, alleviate by adding parallel paging spaces."
fi
###################
# Checking fsbufs #
###################
echo
# Count blocked external pager filesystem I/O with no fsbuf
# Reference to 90 days
fsio_90=$(( ${fsbuf}/${ref}*90 ))
# Any fsbufs blocked is bad
if [[ ${#fsio_90} -gt 2 ]]
then
url=true
printf "[\033[1;33mWARNING\033[m] "
echo "\033[1;31mfsbufs is above 100\033[m, indicating filesystem I/O over-load. Increase j2_dynamicBufferPreallocation with ioo to fix this. Start by doubling value."
echo "Also consider splitting into smaller file systems."
fi
[[ "${url}" == "true" ]] && echo "Info on I/O buffers: https://www.ibm.com/support/pages/blocked-ios-due-buffers-shortage"
###################
# Fibre Adapters #
###################
adapters=$(lsdev -Ccadapter | grep fcs | awk '{ print $1 }')
# Check No Command Resource Count (Update num_cmd_elems)
for adapter in ${adapters}
do
ncrc=$(fcstat -D ${adapter} | grep "No Command Resource Count" | awk '{ print $NF }')
# Reference to 90 days
ncrc_90=$(( ${ncrc}/${ref}*90 ))
# No sure how many is bad, let's start with 6 digits
if [[ ${#ncrc_90} -gt 6 ]]
then
url=true
printf "[\033[1;33mWARNING\033[m] "
echo "- \033[1;31mNo Command Resource Count for adapter ${adapter} is extremely high:\033[m ${ncrc} -> ${ncrc_90} per 90 days (${#ncrc_90} digits)"
echo "Increase num_cmd_elems on ${adapter} to fix, but not higher than num_cmd_elems on the VIO physical adapter."
elif [[ ${#ncrc_90} -gt 5 ]]
then
url=true
printf "[\033[1;33mWARNING\033[m] "
echo "- \033[1;31mNo Command Resource Count for adapter ${adapter} is very high:\033[m ${ncrc} -> ${ncrc_90} per 90 days (${#ncrc_90} digits)"
echo "Increase num_cmd_elems on ${adapter} to fix, but not higher than num_cmd_elems on the VIO physical adapter."
fi
done
[[ "${url}" == "true" ]] && echo "Info on fcs buffers: https://www.ibm.com/support/pages/no-command-resource-count-and-high-water-mark-active-and-pending-commands"
url=false
echo
# Check High water mark of active/pending commands (Update num_cmd_elems)
for adapter in ${adapters}
do
hwmac=$(fcstat -D ${adapter} | grep -p "FC SCSI Adapter Driver Queue" | grep "High water mark of active commands" | awk '{ print $NF }')
hwmpc=$(fcstat -D ${adapter} | grep -p "FC SCSI Adapter Driver Queue" | grep "High water mark of pending commands" | awk '{ print $NF }')
# Reference to 90 days
hwmac_90=$(( ${hwmac}/${ref}*90 ))
hwmpc_90=$(( ${hwmpc}/${ref}*90 ))
hwm_summ=$(( ${hwmac} + ${hwmpc} ))
# We need the current num_cmd_elems setting
nce=$(lsattr -El fcs0 -a num_cmd_elems -F value)
if [[ ${hwm_summ} -gt ${nce} ]]
then
url=true
printf "[\033[1;33mWARNING\033[m] "
echo "- \033[1;31mHigh water mark for active/pending command for adapter ${adapter} is higher than num_cmd_elems:\033[m ${hwm_summ} vs. ${nce}"
echo "Increase num_cmd_elems on ${adapter} to fix, but not higher than num_cmd_elems on the VIO physical adapter."
fi
done
# Link to helpful web page.
echo
[[ "${url}" == "true" ]] && echo "Info on fcs buffers: https://www.ibm.com/support/pages/no-command-resource-count-and-high-water-mark-active-and-pending-commands"
url=false
echo
# Check No DMA Resource Count (Update max_xfer_size)
for adapter in ${adapters}
do
nodma=$(fcstat -D ${adapter} | grep "No DMA Resource Count" | awk '{ print $NF }')
# Reference to 90 days
nodma_90=$(( ${nodma}/${ref}*90 ))
if [[ ${#nodma_90} -gt 3 ]]
then
url=true
printf "[\033[1;33mWARNING\033[m] "
echo "- \033[1;31mNo DMA Resource Count for adapter ${adapter} is higher than 3 digits per 90 days:\033[m ${nodma_90}"
echo "Increase max_xfer_size on ${adapter} to fix, but not higher than max_xfer_size on the VIO physical adapter."
fi
done
# Link to helpful web page.
echo
[[ "${url}" == "true" ]] && echo "Info on fcs buffers: https://www.ibm.com/support/pages/no-command-resource-count-and-high-water-mark-active-and-pending-commands"
url=false
echo
exit 0