Here is the final version of my script that will watch for disconnected
client machines and kill their associated server side processes.
If I'm doing anything really stupid I would appreciate hearing about it.
If it's useful to anyone, feel free to pass it along.
Thanks,
Mark
#!/bin/sh
# the next line restarts using wish \
exec tclsh8.3 "$0" "$@"
# pg-watchdog -- make sure postgres clients are reachable.
#
# This program is a watchdog for postgresql. It monitors all
# postgresql client processes and kills them if the host upon
# which they are running becomes inaccessible over the network.
#
# This is necessary because if a machine goes down "hard" or
# if a network cable becomes disconnected, it can be several
# hours before postgres will be informed of this (for details,
# see chapter 23 of W. Richard Steven's TCP/IP Illustrated,
# volume 1). If a postgres client on that machine has a
# transaction lock, all other clients will be blocked waiting
# for that lock to be released.
#
# This program uses the Scotty package available at:
# http://wwwhome.cs.utwente.nl/~schoenw/scotty
# If you do not wish to install Scotty you can change
# the icmp command to instead use whatever flavor of
# ping is available on your system.
#
# Please send feedback, improvements, etc, to
# Mark Harrison, mh@pixar.com
package require Tnm
proc doit {} {
after 60000 doit
set pids("") ""
unset pids("")
set lines [exec ps auxww | awk {$11=="postgres:" {print $2 "/" $14}}]
foreach s [split $lines \n] {
set x [split $s /]
set pid [lindex $x 0]
set ip [lindex $x 1]
if {[regexp {[0-9]+[.][0-9]+[.][0-9]+[.][0-9]+} $ip]} {
lappend pids($ip) $pid
}
}
if {[array size pids] > 0} {
set times [icmp -timeout 20 echo [lsort [array names pids]]]
foreach pair $times {
set ip [lindex $pair 0]
set time [lindex $pair 1]
if {$time == -1} {
foreach pid $pids($ip) {
puts "killing $pid from $ip"
exec kill $pid
}
}
}
}
}
after 0 doit
vwait forever
--
Mark Harrison
Pixar Animation Studios