| 1 | # Hey Emacs, this is a -*- shell-script -*- !!!
|
|---|
| 2 |
|
|---|
| 3 | # utility functions for ctdb event scripts
|
|---|
| 4 |
|
|---|
| 5 | if [ -z "$CTDB_BASE" ] ; then
|
|---|
| 6 | echo 'CTDB_BASE unset in CTDB functions file'
|
|---|
| 7 | exit 1
|
|---|
| 8 | fi
|
|---|
| 9 |
|
|---|
| 10 | CTDB_VARDIR="/usr/local/var/lib/ctdb"
|
|---|
| 11 | ctdb_rundir="/usr/local/var/run/ctdb"
|
|---|
| 12 |
|
|---|
| 13 | # Only (and always) override these variables in test code
|
|---|
| 14 |
|
|---|
| 15 | if [ -z "$CTDB_SCRIPT_VARDIR" ] ; then
|
|---|
| 16 | CTDB_SCRIPT_VARDIR="/usr/local/var/lib/ctdb/state"
|
|---|
| 17 | fi
|
|---|
| 18 |
|
|---|
| 19 | if [ -z "$CTDB_SYS_ETCDIR" ] ; then
|
|---|
| 20 | CTDB_SYS_ETCDIR="/etc"
|
|---|
| 21 | fi
|
|---|
| 22 |
|
|---|
| 23 | if [ -z "$CTDB_HELPER_BINDIR" ] ; then
|
|---|
| 24 | CTDB_HELPER_BINDIR="/usr/local/libexec/ctdb"
|
|---|
| 25 | fi
|
|---|
| 26 |
|
|---|
| 27 | #######################################
|
|---|
| 28 | # pull in a system config file, if any
|
|---|
| 29 |
|
|---|
| 30 | rewrite_ctdb_options ()
|
|---|
| 31 | {
|
|---|
| 32 | case "$CTDB_DBDIR" in
|
|---|
| 33 | tmpfs|tmpfs:*)
|
|---|
| 34 | _opts_defaults="mode=700"
|
|---|
| 35 | # Get any extra options specified after colon
|
|---|
| 36 | if [ "$CTDB_DBDIR" = "tmpfs" ] ; then
|
|---|
| 37 | _opts=""
|
|---|
| 38 | else
|
|---|
| 39 | _opts="${CTDB_DBDIR#tmpfs:}"
|
|---|
| 40 | fi
|
|---|
| 41 | # This is an internal variable, only used by ctdbd_wrapper.
|
|---|
| 42 | # It is OK to repeat mount options - last value wins
|
|---|
| 43 | CTDB_DBDIR_TMPFS_OPTIONS="${_opts_defaults}${_opts:+,}${_opts}"
|
|---|
| 44 |
|
|---|
| 45 | CTDB_DBDIR="${ctdb_rundir}/CTDB_DBDIR"
|
|---|
| 46 | ;;
|
|---|
| 47 | *)
|
|---|
| 48 | CTDB_DBDIR_TMPFS_OPTIONS=""
|
|---|
| 49 | esac
|
|---|
| 50 | }
|
|---|
| 51 |
|
|---|
| 52 | _loadconfig() {
|
|---|
| 53 |
|
|---|
| 54 | if [ -z "$1" ] ; then
|
|---|
| 55 | foo="${service_config:-${service_name}}"
|
|---|
| 56 | if [ -n "$foo" ] ; then
|
|---|
| 57 | loadconfig "$foo"
|
|---|
| 58 | return
|
|---|
| 59 | fi
|
|---|
| 60 | fi
|
|---|
| 61 |
|
|---|
| 62 | if [ "$1" != "ctdb" ] ; then
|
|---|
| 63 | loadconfig "ctdb"
|
|---|
| 64 | fi
|
|---|
| 65 |
|
|---|
| 66 | if [ -z "$1" ] ; then
|
|---|
| 67 | return
|
|---|
| 68 | fi
|
|---|
| 69 |
|
|---|
| 70 | if [ -f $CTDB_SYS_ETCDIR/sysconfig/$1 ]; then
|
|---|
| 71 | . $CTDB_SYS_ETCDIR/sysconfig/$1
|
|---|
| 72 | elif [ -f $CTDB_SYS_ETCDIR/default/$1 ]; then
|
|---|
| 73 | . $CTDB_SYS_ETCDIR/default/$1
|
|---|
| 74 | elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
|
|---|
| 75 | . $CTDB_BASE/sysconfig/$1
|
|---|
| 76 | fi
|
|---|
| 77 |
|
|---|
| 78 | if [ "$1" = "ctdb" ] ; then
|
|---|
| 79 | _config="${CTDB_BASE}/ctdbd.conf"
|
|---|
| 80 | if [ -r "$_config" ] ; then
|
|---|
| 81 | . "$_config"
|
|---|
| 82 | fi
|
|---|
| 83 | rewrite_ctdb_options
|
|---|
| 84 | fi
|
|---|
| 85 | }
|
|---|
| 86 |
|
|---|
| 87 | loadconfig () {
|
|---|
| 88 | _loadconfig "$@"
|
|---|
| 89 | }
|
|---|
| 90 |
|
|---|
| 91 | ##############################################################
|
|---|
| 92 |
|
|---|
| 93 | # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
|
|---|
| 94 | # configuration file.
|
|---|
| 95 | debug ()
|
|---|
| 96 | {
|
|---|
| 97 | if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
|
|---|
| 98 | # If there are arguments then echo them. Otherwise expect to
|
|---|
| 99 | # use stdin, which allows us to pass lots of debug using a
|
|---|
| 100 | # here document.
|
|---|
| 101 | if [ -n "$1" ] ; then
|
|---|
| 102 | echo "DEBUG: $*"
|
|---|
| 103 | else
|
|---|
| 104 | sed -e 's@^@DEBUG: @'
|
|---|
| 105 | fi
|
|---|
| 106 | else
|
|---|
| 107 | if [ -z "$1" ] ; then
|
|---|
| 108 | cat >/dev/null
|
|---|
| 109 | fi
|
|---|
| 110 | fi
|
|---|
| 111 | }
|
|---|
| 112 |
|
|---|
| 113 | die ()
|
|---|
| 114 | {
|
|---|
| 115 | _msg="$1"
|
|---|
| 116 | _rc="${2:-1}"
|
|---|
| 117 |
|
|---|
| 118 | echo "$_msg"
|
|---|
| 119 | exit $_rc
|
|---|
| 120 | }
|
|---|
| 121 |
|
|---|
| 122 | # Log given message or stdin to either syslog or a CTDB log file
|
|---|
| 123 | # $1 is the tag passed to logger if syslog is in use.
|
|---|
| 124 | script_log ()
|
|---|
| 125 | {
|
|---|
| 126 | _tag="$1" ; shift
|
|---|
| 127 |
|
|---|
| 128 | case "$CTDB_LOGGING" in
|
|---|
| 129 | file:*|"")
|
|---|
| 130 | if [ -n "$CTDB_LOGGING" ] ; then
|
|---|
| 131 | _file="${CTDB_LOGGING#file:}"
|
|---|
| 132 | else
|
|---|
| 133 | _file="/usr/local/var/log/log.ctdb"
|
|---|
| 134 | fi
|
|---|
| 135 | {
|
|---|
| 136 | if [ -n "$*" ] ; then
|
|---|
| 137 | echo "$*"
|
|---|
| 138 | else
|
|---|
| 139 | cat
|
|---|
| 140 | fi
|
|---|
| 141 | } >>"$_file"
|
|---|
| 142 | ;;
|
|---|
| 143 | *)
|
|---|
| 144 | # Handle all syslog:* variants here too. There's no tool to do
|
|---|
| 145 | # the lossy things, so just use logger.
|
|---|
| 146 | logger -t "ctdbd: ${_tag}" $*
|
|---|
| 147 | ;;
|
|---|
| 148 | esac
|
|---|
| 149 | }
|
|---|
| 150 |
|
|---|
| 151 | # When things are run in the background in an eventscript then logging
|
|---|
| 152 | # output might get lost. This is the "solution". :-)
|
|---|
| 153 | background_with_logging ()
|
|---|
| 154 | {
|
|---|
| 155 | (
|
|---|
| 156 | "$@" 2>&1 </dev/null |
|
|---|
| 157 | script_log "${script_name}&"
|
|---|
| 158 | )&
|
|---|
| 159 |
|
|---|
| 160 | return 0
|
|---|
| 161 | }
|
|---|
| 162 |
|
|---|
| 163 | ##############################################################
|
|---|
| 164 | # check number of args for different events
|
|---|
| 165 | ctdb_check_args ()
|
|---|
| 166 | {
|
|---|
| 167 | case "$1" in
|
|---|
| 168 | takeip|releaseip)
|
|---|
| 169 | if [ $# != 4 ]; then
|
|---|
| 170 | echo "ERROR: must supply interface, IP and maskbits"
|
|---|
| 171 | exit 1
|
|---|
| 172 | fi
|
|---|
| 173 | ;;
|
|---|
| 174 | updateip)
|
|---|
| 175 | if [ $# != 5 ]; then
|
|---|
| 176 | echo "ERROR: must supply old interface, new interface, IP and maskbits"
|
|---|
| 177 | exit 1
|
|---|
| 178 | fi
|
|---|
| 179 | ;;
|
|---|
| 180 | esac
|
|---|
| 181 | }
|
|---|
| 182 |
|
|---|
| 183 | ##############################################################
|
|---|
| 184 | # determine on what type of system (init style) we are running
|
|---|
| 185 | detect_init_style()
|
|---|
| 186 | {
|
|---|
| 187 | # only do detection if not already set:
|
|---|
| 188 | [ -z "$CTDB_INIT_STYLE" ] || return
|
|---|
| 189 |
|
|---|
| 190 | if [ -x /sbin/startproc ]; then
|
|---|
| 191 | CTDB_INIT_STYLE="suse"
|
|---|
| 192 | elif [ -x /sbin/start-stop-daemon ]; then
|
|---|
| 193 | CTDB_INIT_STYLE="debian"
|
|---|
| 194 | else
|
|---|
| 195 | CTDB_INIT_STYLE="redhat"
|
|---|
| 196 | fi
|
|---|
| 197 | }
|
|---|
| 198 |
|
|---|
| 199 | ######################################################
|
|---|
| 200 | # simulate /sbin/service on platforms that don't have it
|
|---|
| 201 | # _service() makes it easier to hook the service() function for
|
|---|
| 202 | # testing.
|
|---|
| 203 | _service ()
|
|---|
| 204 | {
|
|---|
| 205 | _service_name="$1"
|
|---|
| 206 | _op="$2"
|
|---|
| 207 |
|
|---|
| 208 | # do nothing, when no service was specified
|
|---|
| 209 | [ -z "$_service_name" ] && return
|
|---|
| 210 |
|
|---|
| 211 | if [ -x /sbin/service ]; then
|
|---|
| 212 | $_nice /sbin/service "$_service_name" "$_op"
|
|---|
| 213 | elif [ -x /usr/sbin/service ]; then
|
|---|
| 214 | $_nice /usr/sbin/service "$_service_name" "$_op"
|
|---|
| 215 | elif [ -x $CTDB_SYS_ETCDIR/init.d/$_service_name ]; then
|
|---|
| 216 | $_nice $CTDB_SYS_ETCDIR/init.d/$_service_name "$_op"
|
|---|
| 217 | elif [ -x $CTDB_SYS_ETCDIR/rc.d/init.d/$_service_name ]; then
|
|---|
| 218 | $_nice $CTDB_SYS_ETCDIR/rc.d/init.d/$_service_name "$_op"
|
|---|
| 219 | fi
|
|---|
| 220 | }
|
|---|
| 221 |
|
|---|
| 222 | service()
|
|---|
| 223 | {
|
|---|
| 224 | _nice=""
|
|---|
| 225 | _service "$@"
|
|---|
| 226 | }
|
|---|
| 227 |
|
|---|
| 228 | ######################################################
|
|---|
| 229 | # simulate /sbin/service (niced) on platforms that don't have it
|
|---|
| 230 | nice_service()
|
|---|
| 231 | {
|
|---|
| 232 | _nice="nice"
|
|---|
| 233 | _service "$@"
|
|---|
| 234 | }
|
|---|
| 235 |
|
|---|
| 236 | ######################################################
|
|---|
| 237 | # Cached retrieval of PNN from local node. This never changes so why
|
|---|
| 238 | # open a client connection to the server each time this is needed?
|
|---|
| 239 | # This sets $pnn - this avoid an unnecessary subprocess.
|
|---|
| 240 | ctdb_get_pnn ()
|
|---|
| 241 | {
|
|---|
| 242 | _pnn_file="${CTDB_SCRIPT_VARDIR}/my-pnn"
|
|---|
| 243 | if [ ! -f "$_pnn_file" ] ; then
|
|---|
| 244 | ctdb pnn | sed -e 's@.*:@@' >"$_pnn_file"
|
|---|
| 245 | fi
|
|---|
| 246 |
|
|---|
| 247 | read pnn <"$_pnn_file"
|
|---|
| 248 | }
|
|---|
| 249 |
|
|---|
| 250 | ######################################################
|
|---|
| 251 | # wrapper around /proc/ settings to allow them to be hooked
|
|---|
| 252 | # for testing
|
|---|
| 253 | # 1st arg is relative path under /proc/, 2nd arg is value to set
|
|---|
| 254 | set_proc ()
|
|---|
| 255 | {
|
|---|
| 256 | echo "$2" >"/proc/$1"
|
|---|
| 257 | }
|
|---|
| 258 |
|
|---|
| 259 | set_proc_maybe ()
|
|---|
| 260 | {
|
|---|
| 261 | if [ -w "/proc/$1" ] ; then
|
|---|
| 262 | set_proc "$1" "$2"
|
|---|
| 263 | fi
|
|---|
| 264 | }
|
|---|
| 265 |
|
|---|
| 266 | ######################################################
|
|---|
| 267 | # wrapper around getting file contents from /proc/ to allow
|
|---|
| 268 | # this to be hooked for testing
|
|---|
| 269 | # 1st arg is relative path under /proc/
|
|---|
| 270 | get_proc ()
|
|---|
| 271 | {
|
|---|
| 272 | cat "/proc/$1"
|
|---|
| 273 | }
|
|---|
| 274 |
|
|---|
| 275 | ######################################################
|
|---|
| 276 | # Print up to $_max kernel stack traces for processes named $_program
|
|---|
| 277 | program_stack_traces ()
|
|---|
| 278 | {
|
|---|
| 279 | _prog="$1"
|
|---|
| 280 | _max="${2:-1}"
|
|---|
| 281 |
|
|---|
| 282 | _count=1
|
|---|
| 283 | for _pid in $(pidof "$_prog") ; do
|
|---|
| 284 | [ $_count -le $_max ] || break
|
|---|
| 285 |
|
|---|
| 286 | # Do this first to avoid racing with process exit
|
|---|
| 287 | _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
|
|---|
| 288 | if [ -n "$_stack" ] ; then
|
|---|
| 289 | echo "Stack trace for ${_prog}[${_pid}]:"
|
|---|
| 290 | echo "$_stack"
|
|---|
| 291 | _count=$(($_count + 1))
|
|---|
| 292 | fi
|
|---|
| 293 | done
|
|---|
| 294 | }
|
|---|
| 295 |
|
|---|
| 296 | ######################################################
|
|---|
| 297 | # Ensure $service_name is set
|
|---|
| 298 | assert_service_name ()
|
|---|
| 299 | {
|
|---|
| 300 | [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
|
|---|
| 301 | }
|
|---|
| 302 |
|
|---|
| 303 | ######################################################
|
|---|
| 304 | # check a set of directories is available
|
|---|
| 305 | # return 1 on a missing directory
|
|---|
| 306 | # directories are read from stdin
|
|---|
| 307 | ######################################################
|
|---|
| 308 | ctdb_check_directories_probe()
|
|---|
| 309 | {
|
|---|
| 310 | while IFS="" read d ; do
|
|---|
| 311 | case "$d" in
|
|---|
| 312 | *%*)
|
|---|
| 313 | continue
|
|---|
| 314 | ;;
|
|---|
| 315 | *)
|
|---|
| 316 | [ -d "${d}/." ] || return 1
|
|---|
| 317 | esac
|
|---|
| 318 | done
|
|---|
| 319 | }
|
|---|
| 320 |
|
|---|
| 321 | ######################################################
|
|---|
| 322 | # check a set of directories is available
|
|---|
| 323 | # directories are read from stdin
|
|---|
| 324 | ######################################################
|
|---|
| 325 | ctdb_check_directories()
|
|---|
| 326 | {
|
|---|
| 327 | ctdb_check_directories_probe || {
|
|---|
| 328 | echo "ERROR: $service_name directory \"$d\" not available"
|
|---|
| 329 | exit 1
|
|---|
| 330 | }
|
|---|
| 331 | }
|
|---|
| 332 |
|
|---|
| 333 | ######################################################
|
|---|
| 334 | # check a set of tcp ports
|
|---|
| 335 | # usage: ctdb_check_tcp_ports <ports...>
|
|---|
| 336 | ######################################################
|
|---|
| 337 |
|
|---|
| 338 | # This flag file is created when a service is initially started. It
|
|---|
| 339 | # is deleted the first time TCP port checks for that service succeed.
|
|---|
| 340 | # Until then ctdb_check_tcp_ports() prints a more subtle "error"
|
|---|
| 341 | # message if a port check fails.
|
|---|
| 342 | _ctdb_check_tcp_common ()
|
|---|
| 343 | {
|
|---|
| 344 | assert_service_name
|
|---|
| 345 | _d="${CTDB_SCRIPT_VARDIR}/failcount"
|
|---|
| 346 | _ctdb_service_started_file="${_d}/${service_name}.started"
|
|---|
| 347 | }
|
|---|
| 348 |
|
|---|
| 349 | ctdb_check_tcp_init ()
|
|---|
| 350 | {
|
|---|
| 351 | _ctdb_check_tcp_common
|
|---|
| 352 | mkdir -p "${_ctdb_service_started_file%/*}" # dirname
|
|---|
| 353 | touch "$_ctdb_service_started_file"
|
|---|
| 354 | }
|
|---|
| 355 |
|
|---|
| 356 | # Check whether something is listening on all of the given TCP ports
|
|---|
| 357 | # using the "ctdb checktcpport" command.
|
|---|
| 358 | ctdb_check_tcp_ports()
|
|---|
| 359 | {
|
|---|
| 360 | if [ -z "$1" ] ; then
|
|---|
| 361 | echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
|
|---|
| 362 | exit 1
|
|---|
| 363 | fi
|
|---|
| 364 |
|
|---|
| 365 | for _p ; do # process each function argument (port)
|
|---|
| 366 | _cmd="ctdb checktcpport $_p"
|
|---|
| 367 | _out=$($_cmd 2>&1)
|
|---|
| 368 | _ret=$?
|
|---|
| 369 | case "$_ret" in
|
|---|
| 370 | 0)
|
|---|
| 371 | _ctdb_check_tcp_common
|
|---|
| 372 | if [ ! -f "$_ctdb_service_started_file" ] ; then
|
|---|
| 373 | echo "ERROR: $service_name tcp port $_p is not responding"
|
|---|
| 374 | debug "\"ctdb checktcpport $_p\" was able to bind to port"
|
|---|
| 375 | else
|
|---|
| 376 | echo "INFO: $service_name tcp port $_p is not responding"
|
|---|
| 377 | fi
|
|---|
| 378 |
|
|---|
| 379 | return 1
|
|---|
| 380 | ;;
|
|---|
| 381 | 98)
|
|---|
| 382 | # Couldn't bind, something already listening, next port...
|
|---|
| 383 | continue
|
|---|
| 384 | ;;
|
|---|
| 385 | *)
|
|---|
| 386 | echo "ERROR: unexpected error running \"ctdb checktcpport\""
|
|---|
| 387 | debug <<EOF
|
|---|
| 388 | ctdb checktcpport (exited with $_ret) with output:
|
|---|
| 389 | $_out"
|
|---|
| 390 | EOF
|
|---|
| 391 | return $_ret
|
|---|
| 392 | esac
|
|---|
| 393 | done
|
|---|
| 394 |
|
|---|
| 395 | # All ports listening
|
|---|
| 396 | _ctdb_check_tcp_common
|
|---|
| 397 | rm -f "$_ctdb_service_started_file"
|
|---|
| 398 | return 0
|
|---|
| 399 | }
|
|---|
| 400 |
|
|---|
| 401 | ######################################################
|
|---|
| 402 | # check a unix socket
|
|---|
| 403 | # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
|
|---|
| 404 | ######################################################
|
|---|
| 405 | ctdb_check_unix_socket() {
|
|---|
| 406 | socket_path="$1"
|
|---|
| 407 | [ -z "$socket_path" ] && return
|
|---|
| 408 |
|
|---|
| 409 | if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
|
|---|
| 410 | echo "ERROR: $service_name socket $socket_path not found"
|
|---|
| 411 | return 1
|
|---|
| 412 | fi
|
|---|
| 413 | }
|
|---|
| 414 |
|
|---|
| 415 | ######################################################
|
|---|
| 416 | # check a command returns zero status
|
|---|
| 417 | # usage: ctdb_check_command <command>
|
|---|
| 418 | ######################################################
|
|---|
| 419 | ctdb_check_command ()
|
|---|
| 420 | {
|
|---|
| 421 | _out=$("$@" 2>&1) || {
|
|---|
| 422 | echo "ERROR: $* returned error"
|
|---|
| 423 | echo "$_out" | debug
|
|---|
| 424 | exit 1
|
|---|
| 425 | }
|
|---|
| 426 | }
|
|---|
| 427 |
|
|---|
| 428 | ################################################
|
|---|
| 429 | # kill off any TCP connections with the given IP
|
|---|
| 430 | ################################################
|
|---|
| 431 | kill_tcp_connections ()
|
|---|
| 432 | {
|
|---|
| 433 | _ip="$1"
|
|---|
| 434 |
|
|---|
| 435 | _oneway=false
|
|---|
| 436 | if [ "$2" = "oneway" ] ; then
|
|---|
| 437 | _oneway=true
|
|---|
| 438 | fi
|
|---|
| 439 |
|
|---|
| 440 | get_tcp_connections_for_ip "$_ip" | {
|
|---|
| 441 | _killcount=0
|
|---|
| 442 | _connections=""
|
|---|
| 443 | _nl="
|
|---|
| 444 | "
|
|---|
| 445 | while read _dst _src; do
|
|---|
| 446 | _destport="${_dst##*:}"
|
|---|
| 447 | __oneway=$_oneway
|
|---|
| 448 | case $_destport in
|
|---|
| 449 | # we only do one-way killtcp for CIFS
|
|---|
| 450 | 139|445) __oneway=true ;;
|
|---|
| 451 | esac
|
|---|
| 452 |
|
|---|
| 453 | echo "Killing TCP connection $_src $_dst"
|
|---|
| 454 | _connections="${_connections}${_nl}${_src} ${_dst}"
|
|---|
| 455 | if ! $__oneway ; then
|
|---|
| 456 | _connections="${_connections}${_nl}${_dst} ${_src}"
|
|---|
| 457 | fi
|
|---|
| 458 |
|
|---|
| 459 | _killcount=$(($_killcount + 1))
|
|---|
| 460 | done
|
|---|
| 461 |
|
|---|
| 462 | if [ $_killcount -eq 0 ] ; then
|
|---|
| 463 | return
|
|---|
| 464 | fi
|
|---|
| 465 |
|
|---|
| 466 | echo "$_connections" | ctdb killtcp || {
|
|---|
| 467 | echo "Failed to send killtcp control"
|
|---|
| 468 | return
|
|---|
| 469 | }
|
|---|
| 470 |
|
|---|
| 471 | _count=0
|
|---|
| 472 | while : ; do
|
|---|
| 473 | _remaining=$(get_tcp_connections_for_ip $_ip | wc -l)
|
|---|
| 474 |
|
|---|
| 475 | if [ $_remaining -eq 0 ] ; then
|
|---|
| 476 | echo "Killed $_killcount TCP connections to released IP $_ip"
|
|---|
| 477 | return
|
|---|
| 478 | fi
|
|---|
| 479 |
|
|---|
| 480 | _count=$(($_count + 1))
|
|---|
| 481 | if [ $_count -gt 3 ] ; then
|
|---|
| 482 | echo "Timed out killing tcp connections for IP $_ip ($_remaining remaining)"
|
|---|
| 483 | return
|
|---|
| 484 | fi
|
|---|
| 485 |
|
|---|
| 486 | echo "Waiting for $_remaining connections to be killed for IP $_ip"
|
|---|
| 487 | sleep 1
|
|---|
| 488 | done
|
|---|
| 489 | }
|
|---|
| 490 | }
|
|---|
| 491 |
|
|---|
| 492 | ##################################################################
|
|---|
| 493 | # kill off the local end for any TCP connections with the given IP
|
|---|
| 494 | ##################################################################
|
|---|
| 495 | kill_tcp_connections_local_only ()
|
|---|
| 496 | {
|
|---|
| 497 | kill_tcp_connections "$1" "oneway"
|
|---|
| 498 | }
|
|---|
| 499 |
|
|---|
| 500 | ##################################################################
|
|---|
| 501 | # tickle any TCP connections with the given IP
|
|---|
| 502 | ##################################################################
|
|---|
| 503 | tickle_tcp_connections ()
|
|---|
| 504 | {
|
|---|
| 505 | _ip="$1"
|
|---|
| 506 |
|
|---|
| 507 | get_tcp_connections_for_ip "$_ip" |
|
|---|
| 508 | {
|
|---|
| 509 | _failed=false
|
|---|
| 510 |
|
|---|
| 511 | while read dest src; do
|
|---|
| 512 | echo "Tickle TCP connection $src $dest"
|
|---|
| 513 | ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true
|
|---|
| 514 | echo "Tickle TCP connection $dest $src"
|
|---|
| 515 | ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true
|
|---|
| 516 | done
|
|---|
| 517 |
|
|---|
| 518 | if $_failed ; then
|
|---|
| 519 | echo "Failed to send tickle control"
|
|---|
| 520 | fi
|
|---|
| 521 | }
|
|---|
| 522 | }
|
|---|
| 523 |
|
|---|
| 524 | get_tcp_connections_for_ip ()
|
|---|
| 525 | {
|
|---|
| 526 | _ip="$1"
|
|---|
| 527 |
|
|---|
| 528 | netstat -tn | awk -v ip=$_ip \
|
|---|
| 529 | 'index($1, "tcp") == 1 && \
|
|---|
| 530 | (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \
|
|---|
| 531 | && $6 == "ESTABLISHED" \
|
|---|
| 532 | {print $4" "$5}'
|
|---|
| 533 | }
|
|---|
| 534 |
|
|---|
| 535 | ########################################################
|
|---|
| 536 |
|
|---|
| 537 | add_ip_to_iface ()
|
|---|
| 538 | {
|
|---|
| 539 | _iface=$1
|
|---|
| 540 | _ip=$2
|
|---|
| 541 | _maskbits=$3
|
|---|
| 542 |
|
|---|
| 543 | # Ensure interface is up
|
|---|
| 544 | ip link set "$_iface" up || \
|
|---|
| 545 | die "Failed to bringup interface $_iface"
|
|---|
| 546 |
|
|---|
| 547 | # Only need to define broadcast for IPv4
|
|---|
| 548 | case "$ip" in
|
|---|
| 549 | *:*) _bcast="" ;;
|
|---|
| 550 | *) _bcast="brd +" ;;
|
|---|
| 551 | esac
|
|---|
| 552 |
|
|---|
| 553 | ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
|
|---|
| 554 | echo "Failed to add $_ip/$_maskbits on dev $_iface"
|
|---|
| 555 | return 1
|
|---|
| 556 | }
|
|---|
| 557 |
|
|---|
| 558 | # Wait 5 seconds for IPv6 addresses to stop being tentative...
|
|---|
| 559 | if [ -z "$_bcast" ] ; then
|
|---|
| 560 | for _x in $(seq 1 10) ; do
|
|---|
| 561 | ip addr show to "${_ip}/128" | grep -q "tentative" || break
|
|---|
| 562 | sleep 0.5
|
|---|
| 563 | done
|
|---|
| 564 |
|
|---|
| 565 | # If the address was a duplicate then it won't be on the
|
|---|
| 566 | # interface so flag an error.
|
|---|
| 567 | _t=$(ip addr show to "${_ip}/128")
|
|---|
| 568 | case "$_t" in
|
|---|
| 569 | "")
|
|---|
| 570 | echo "Failed to add $_ip/$_maskbits on dev $_iface"
|
|---|
| 571 | return 1
|
|---|
| 572 | ;;
|
|---|
| 573 | *tentative*|*dadfailed*)
|
|---|
| 574 | echo "Failed to add $_ip/$_maskbits on dev $_iface"
|
|---|
| 575 | ip addr del "$_ip/$_maskbits" dev "$_iface"
|
|---|
| 576 | return 1
|
|---|
| 577 | ;;
|
|---|
| 578 | esac
|
|---|
| 579 | fi
|
|---|
| 580 | }
|
|---|
| 581 |
|
|---|
| 582 | delete_ip_from_iface()
|
|---|
| 583 | {
|
|---|
| 584 | _iface=$1
|
|---|
| 585 | _ip=$2
|
|---|
| 586 | _maskbits=$3
|
|---|
| 587 |
|
|---|
| 588 | # This could be set globally for all interfaces but it is probably
|
|---|
| 589 | # better to avoid surprises, so limit it the interfaces where CTDB
|
|---|
| 590 | # has public IP addresses. There isn't anywhere else convenient
|
|---|
| 591 | # to do this so just set it each time. This is much cheaper than
|
|---|
| 592 | # remembering and re-adding secondaries.
|
|---|
| 593 | set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
|
|---|
| 594 |
|
|---|
| 595 | ip addr del "$_ip/$_maskbits" dev "$_iface" || {
|
|---|
| 596 | echo "Failed to del $_ip on dev $_iface"
|
|---|
| 597 | return 1
|
|---|
| 598 | }
|
|---|
| 599 | }
|
|---|
| 600 |
|
|---|
| 601 | # If the given IP is hosted then print 2 items: maskbits and iface
|
|---|
| 602 | ip_maskbits_iface ()
|
|---|
| 603 | {
|
|---|
| 604 | _addr="$1"
|
|---|
| 605 |
|
|---|
| 606 | case "$_addr" in
|
|---|
| 607 | *:*) _family="inet6" ; _bits=128 ;;
|
|---|
| 608 | *) _family="inet" ; _bits=32 ;;
|
|---|
| 609 | esac
|
|---|
| 610 |
|
|---|
| 611 | ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
|
|---|
| 612 | awk -v family="${_family}" \
|
|---|
| 613 | 'NR == 1 { iface = $2; sub(":$", "", iface) ; \
|
|---|
| 614 | sub("@.*", "", iface) } \
|
|---|
| 615 | $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
|
|---|
| 616 | print mask, iface, family }'
|
|---|
| 617 | }
|
|---|
| 618 |
|
|---|
| 619 | drop_ip ()
|
|---|
| 620 | {
|
|---|
| 621 | _addr="${1%/*}" # Remove optional maskbits
|
|---|
| 622 |
|
|---|
| 623 | set -- $(ip_maskbits_iface $_addr)
|
|---|
| 624 | if [ -n "$1" ] ; then
|
|---|
| 625 | _maskbits="$1"
|
|---|
| 626 | _iface="$2"
|
|---|
| 627 | echo "Removing public address $_addr/$_maskbits from device $_iface"
|
|---|
| 628 | delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
|
|---|
| 629 | fi
|
|---|
| 630 | }
|
|---|
| 631 |
|
|---|
| 632 | drop_all_public_ips ()
|
|---|
| 633 | {
|
|---|
| 634 | while read _ip _x ; do
|
|---|
| 635 | drop_ip "$_ip"
|
|---|
| 636 | done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
|
|---|
| 637 | }
|
|---|
| 638 |
|
|---|
| 639 | flush_route_cache ()
|
|---|
| 640 | {
|
|---|
| 641 | set_proc_maybe sys/net/ipv4/route/flush 1
|
|---|
| 642 | set_proc_maybe sys/net/ipv6/route/flush 1
|
|---|
| 643 | }
|
|---|
| 644 |
|
|---|
| 645 | ########################################################
|
|---|
| 646 | # Interface monitoring
|
|---|
| 647 |
|
|---|
| 648 | # If the interface is a virtual one (e.g. VLAN) then get the
|
|---|
| 649 | # underlying interface
|
|---|
| 650 | interface_get_real ()
|
|---|
| 651 | {
|
|---|
| 652 | # Output of "ip link show <iface>"
|
|---|
| 653 | _iface_info="$1"
|
|---|
| 654 |
|
|---|
| 655 | # Extract the full interface description to see if it is a VLAN
|
|---|
| 656 | _t=$(echo "$_iface_info" |
|
|---|
| 657 | awk 'NR == 1 { iface = $2; sub(":$", "", iface) ; \
|
|---|
| 658 | print iface }')
|
|---|
| 659 | case "$_t" in
|
|---|
| 660 | *@*)
|
|---|
| 661 | # VLAN: use the underlying interface, after the '@'
|
|---|
| 662 | echo "${_t##*@}"
|
|---|
| 663 | ;;
|
|---|
| 664 | *)
|
|---|
| 665 | # Not a regular VLAN. For backward compatibility, assume
|
|---|
| 666 | # there is some other sort of VLAN that doesn't have the
|
|---|
| 667 | # '@' in the output and only use what is before a '.'. If
|
|---|
| 668 | # there is no '.' then this will be the whole interface
|
|---|
| 669 | # name.
|
|---|
| 670 | echo "${_t%%.*}"
|
|---|
| 671 | esac
|
|---|
| 672 | }
|
|---|
| 673 |
|
|---|
| 674 | # Check whether an interface is operational
|
|---|
| 675 | interface_monitor ()
|
|---|
| 676 | {
|
|---|
| 677 | _iface="$1"
|
|---|
| 678 |
|
|---|
| 679 | _iface_info=$(ip link show "$_iface" 2>&1) || {
|
|---|
| 680 | echo "ERROR: Monitored interface ${_iface} does not exist"
|
|---|
| 681 | return 1
|
|---|
| 682 | }
|
|---|
| 683 |
|
|---|
| 684 |
|
|---|
| 685 | # If the interface is a virtual one (e.g. VLAN) then get the
|
|---|
| 686 | # underlying interface.
|
|---|
| 687 | _realiface=$(interface_get_real "$_iface_info")
|
|---|
| 688 |
|
|---|
| 689 | if _bi=$(get_proc "net/bonding/${_realiface}" 2>/dev/null) ; then
|
|---|
| 690 | # This is a bond: various monitoring strategies
|
|---|
| 691 | echo "$_bi" | grep -q 'Currently Active Slave: None' && {
|
|---|
| 692 | echo "ERROR: No active slaves for bond device ${_realiface}"
|
|---|
| 693 | return 1
|
|---|
| 694 | }
|
|---|
| 695 | echo "$_bi" | grep -q '^MII Status: up' || {
|
|---|
| 696 | echo "ERROR: public network interface ${_realiface} is down"
|
|---|
| 697 | return 1
|
|---|
| 698 | }
|
|---|
| 699 | echo "$_bi" | grep -q '^Bonding Mode: IEEE 802.3ad Dynamic link aggregation' && {
|
|---|
| 700 | # This works around a bug in the driver where the
|
|---|
| 701 | # overall bond status can be up but none of the actual
|
|---|
| 702 | # physical interfaces have a link.
|
|---|
| 703 | echo "$_bi" | grep 'MII Status:' | tail -n +2 | grep -q '^MII Status: up' || {
|
|---|
| 704 | echo "ERROR: No active slaves for 802.ad bond device ${_realiface}"
|
|---|
| 705 | return 1
|
|---|
| 706 | }
|
|---|
| 707 | }
|
|---|
| 708 |
|
|---|
| 709 | return 0
|
|---|
| 710 | else
|
|---|
| 711 | # Not a bond
|
|---|
| 712 | case "$_iface" in
|
|---|
| 713 | lo*)
|
|---|
| 714 | # loopback is always working
|
|---|
| 715 | return 0
|
|---|
| 716 | ;;
|
|---|
| 717 | ib*)
|
|---|
| 718 | # we don't know how to test ib links
|
|---|
| 719 | return 0
|
|---|
| 720 | ;;
|
|---|
| 721 | *)
|
|---|
| 722 | ethtool "$_iface" | grep -q 'Link detected: yes' || {
|
|---|
| 723 | # On some systems, this is not successful when a
|
|---|
| 724 | # cable is plugged but the interface has not been
|
|---|
| 725 | # brought up previously. Bring the interface up
|
|---|
| 726 | # and try again...
|
|---|
| 727 | ip link set "$_iface" up
|
|---|
| 728 | ethtool "$_iface" | grep -q 'Link detected: yes' || {
|
|---|
| 729 | echo "ERROR: No link on the public network interface ${_iface}"
|
|---|
| 730 | return 1
|
|---|
| 731 | }
|
|---|
| 732 | }
|
|---|
| 733 | return 0
|
|---|
| 734 | ;;
|
|---|
| 735 | esac
|
|---|
| 736 | fi
|
|---|
| 737 | }
|
|---|
| 738 |
|
|---|
| 739 | ########################################################
|
|---|
| 740 | # Simple counters
|
|---|
| 741 | _ctdb_counter_common () {
|
|---|
| 742 | _service_name="${1:-${service_name:-${script_name}}}"
|
|---|
| 743 | _counter_file="${CTDB_SCRIPT_VARDIR}/failcount/${_service_name}"
|
|---|
| 744 | mkdir -p "${_counter_file%/*}" # dirname
|
|---|
| 745 | }
|
|---|
| 746 | ctdb_counter_init () {
|
|---|
| 747 | _ctdb_counter_common "$1"
|
|---|
| 748 |
|
|---|
| 749 | >"$_counter_file"
|
|---|
| 750 | }
|
|---|
| 751 | ctdb_counter_incr () {
|
|---|
| 752 | _ctdb_counter_common "$1"
|
|---|
| 753 |
|
|---|
| 754 | # unary counting!
|
|---|
| 755 | echo -n 1 >> "$_counter_file"
|
|---|
| 756 | }
|
|---|
| 757 | ctdb_counter_get () {
|
|---|
| 758 | _ctdb_counter_common "$1"
|
|---|
| 759 | # unary counting!
|
|---|
| 760 | stat -c "%s" "$_counter_file" 2>/dev/null || echo 0
|
|---|
| 761 | }
|
|---|
| 762 | ctdb_check_counter () {
|
|---|
| 763 | _msg="${1:-error}" # "error" - anything else is silent on fail
|
|---|
| 764 | _op="${2:--ge}" # an integer operator supported by test
|
|---|
| 765 | _limit="${3:-${service_fail_limit}}"
|
|---|
| 766 | shift 3
|
|---|
| 767 |
|
|---|
| 768 | _size=$(ctdb_counter_get "$1")
|
|---|
| 769 |
|
|---|
| 770 | _hit=false
|
|---|
| 771 | if [ "$_op" != "%" ] ; then
|
|---|
| 772 | if [ $_size $_op $_limit ] ; then
|
|---|
| 773 | _hit=true
|
|---|
| 774 | fi
|
|---|
| 775 | else
|
|---|
| 776 | if [ $(($_size $_op $_limit)) -eq 0 ] ; then
|
|---|
| 777 | _hit=true
|
|---|
| 778 | fi
|
|---|
| 779 | fi
|
|---|
| 780 | if $_hit ; then
|
|---|
| 781 | if [ "$_msg" = "error" ] ; then
|
|---|
| 782 | echo "ERROR: $_size consecutive failures for $_service_name, marking node unhealthy"
|
|---|
| 783 | exit 1
|
|---|
| 784 | else
|
|---|
| 785 | return 1
|
|---|
| 786 | fi
|
|---|
| 787 | fi
|
|---|
| 788 | }
|
|---|
| 789 |
|
|---|
| 790 | ########################################################
|
|---|
| 791 |
|
|---|
| 792 | ctdb_setup_service_state_dir ()
|
|---|
| 793 | {
|
|---|
| 794 | service_state_dir="${CTDB_SCRIPT_VARDIR}/service_state/${1:-${service_name}}"
|
|---|
| 795 | mkdir -p "$service_state_dir" || {
|
|---|
| 796 | echo "Error creating state dir \"$service_state_dir\""
|
|---|
| 797 | exit 1
|
|---|
| 798 | }
|
|---|
| 799 | }
|
|---|
| 800 |
|
|---|
| 801 | ########################################################
|
|---|
| 802 | # Managed status history, for auto-start/stop
|
|---|
| 803 |
|
|---|
| 804 | _ctdb_managed_common ()
|
|---|
| 805 | {
|
|---|
| 806 | _ctdb_managed_file="${CTDB_SCRIPT_VARDIR}/managed_history/${service_name}"
|
|---|
| 807 | }
|
|---|
| 808 |
|
|---|
| 809 | ctdb_service_managed ()
|
|---|
| 810 | {
|
|---|
| 811 | _ctdb_managed_common
|
|---|
| 812 | mkdir -p "${_ctdb_managed_file%/*}" # dirname
|
|---|
| 813 | touch "$_ctdb_managed_file"
|
|---|
| 814 | }
|
|---|
| 815 |
|
|---|
| 816 | ctdb_service_unmanaged ()
|
|---|
| 817 | {
|
|---|
| 818 | _ctdb_managed_common
|
|---|
| 819 | rm -f "$_ctdb_managed_file"
|
|---|
| 820 | }
|
|---|
| 821 |
|
|---|
| 822 | is_ctdb_previously_managed_service ()
|
|---|
| 823 | {
|
|---|
| 824 | _ctdb_managed_common
|
|---|
| 825 | [ -f "$_ctdb_managed_file" ]
|
|---|
| 826 | }
|
|---|
| 827 |
|
|---|
| 828 | ##################################################################
|
|---|
| 829 | # Reconfigure a service on demand
|
|---|
| 830 |
|
|---|
| 831 | _ctdb_service_reconfigure_common ()
|
|---|
| 832 | {
|
|---|
| 833 | _d="${CTDB_SCRIPT_VARDIR}/service_status/${service_name}"
|
|---|
| 834 | mkdir -p "$_d"
|
|---|
| 835 | _ctdb_service_reconfigure_flag="$_d/reconfigure"
|
|---|
| 836 | }
|
|---|
| 837 |
|
|---|
| 838 | ctdb_service_needs_reconfigure ()
|
|---|
| 839 | {
|
|---|
| 840 | _ctdb_service_reconfigure_common
|
|---|
| 841 | [ -e "$_ctdb_service_reconfigure_flag" ]
|
|---|
| 842 | }
|
|---|
| 843 |
|
|---|
| 844 | ctdb_service_set_reconfigure ()
|
|---|
| 845 | {
|
|---|
| 846 | _ctdb_service_reconfigure_common
|
|---|
| 847 | >"$_ctdb_service_reconfigure_flag"
|
|---|
| 848 | }
|
|---|
| 849 |
|
|---|
| 850 | ctdb_service_unset_reconfigure ()
|
|---|
| 851 | {
|
|---|
| 852 | _ctdb_service_reconfigure_common
|
|---|
| 853 | rm -f "$_ctdb_service_reconfigure_flag"
|
|---|
| 854 | }
|
|---|
| 855 |
|
|---|
| 856 | ctdb_service_reconfigure ()
|
|---|
| 857 | {
|
|---|
| 858 | echo "Reconfiguring service \"${service_name}\"..."
|
|---|
| 859 | ctdb_service_unset_reconfigure
|
|---|
| 860 | service_reconfigure || return $?
|
|---|
| 861 | ctdb_counter_init
|
|---|
| 862 | }
|
|---|
| 863 |
|
|---|
| 864 | # Default service_reconfigure() function does nothing.
|
|---|
| 865 | service_reconfigure ()
|
|---|
| 866 | {
|
|---|
| 867 | :
|
|---|
| 868 | }
|
|---|
| 869 |
|
|---|
| 870 | ctdb_reconfigure_take_lock ()
|
|---|
| 871 | {
|
|---|
| 872 | _ctdb_service_reconfigure_common
|
|---|
| 873 | _lock="${_d}/reconfigure_lock"
|
|---|
| 874 | mkdir -p "${_lock%/*}" # dirname
|
|---|
| 875 | touch "$_lock"
|
|---|
| 876 |
|
|---|
| 877 | (
|
|---|
| 878 | flock 0
|
|---|
| 879 | # This is overkill but will work if we need to extend this to
|
|---|
| 880 | # allow certain events to run multiple times in parallel
|
|---|
| 881 | # (e.g. takeip) and write multiple PIDs to the file.
|
|---|
| 882 | read _locker_event
|
|---|
| 883 | if [ -n "$_locker_event" ] ; then
|
|---|
| 884 | while read _pid ; do
|
|---|
| 885 | if [ -n "$_pid" -a "$_pid" != $$ ] && \
|
|---|
| 886 | kill -0 "$_pid" 2>/dev/null ; then
|
|---|
| 887 | exit 1
|
|---|
| 888 | fi
|
|---|
| 889 | done
|
|---|
| 890 | fi
|
|---|
| 891 |
|
|---|
| 892 | printf "%s\n%s\n" "$event_name" $$ >"$_lock"
|
|---|
| 893 | exit 0
|
|---|
| 894 | ) <"$_lock"
|
|---|
| 895 | }
|
|---|
| 896 |
|
|---|
| 897 | ctdb_reconfigure_release_lock ()
|
|---|
| 898 | {
|
|---|
| 899 | _ctdb_service_reconfigure_common
|
|---|
| 900 | _lock="${_d}/reconfigure_lock"
|
|---|
| 901 |
|
|---|
| 902 | rm -f "$_lock"
|
|---|
| 903 | }
|
|---|
| 904 |
|
|---|
| 905 | ctdb_replay_monitor_status ()
|
|---|
| 906 | {
|
|---|
| 907 | echo "Replaying previous status for this script due to reconfigure..."
|
|---|
| 908 | # Leading separator ('|') is missing in some versions...
|
|---|
| 909 | _out=$(ctdb scriptstatus -X | grep -E "^\|?monitor\|${script_name}\|")
|
|---|
| 910 | # Output looks like this:
|
|---|
| 911 | # |monitor|60.nfs|1|ERROR|1314764004.030861|1314764004.035514|foo bar|
|
|---|
| 912 | # This is the cheapest way of getting fields in the middle.
|
|---|
| 913 | set -- $(IFS="|" ; echo $_out)
|
|---|
| 914 | _code="$3"
|
|---|
| 915 | _status="$4"
|
|---|
| 916 | # The error output field can include colons so we'll try to
|
|---|
| 917 | # preserve them. The weak checking at the beginning tries to make
|
|---|
| 918 | # this work for both broken (no leading '|') and fixed output.
|
|---|
| 919 | _out="${_out%|}"
|
|---|
| 920 | _err_out="${_out#*monitor|${script_name}|*|*|*|*|}"
|
|---|
| 921 | case "$_status" in
|
|---|
| 922 | OK) : ;; # Do nothing special.
|
|---|
| 923 | TIMEDOUT)
|
|---|
| 924 | # Recast this as an error, since we can't exit with the
|
|---|
| 925 | # correct negative number.
|
|---|
| 926 | _code=1
|
|---|
| 927 | _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
|
|---|
| 928 | ;;
|
|---|
| 929 | DISABLED)
|
|---|
| 930 | # Recast this as an OK, since we can't exit with the
|
|---|
| 931 | # correct negative number.
|
|---|
| 932 | _code=0
|
|---|
| 933 | _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
|
|---|
| 934 | ;;
|
|---|
| 935 | *) : ;; # Must be ERROR, do nothing special.
|
|---|
| 936 | esac
|
|---|
| 937 | if [ -n "$_err_out" ] ; then
|
|---|
| 938 | echo "$_err_out"
|
|---|
| 939 | fi
|
|---|
| 940 | exit $_code
|
|---|
| 941 | }
|
|---|
| 942 |
|
|---|
| 943 | ctdb_service_check_reconfigure ()
|
|---|
| 944 | {
|
|---|
| 945 | assert_service_name
|
|---|
| 946 |
|
|---|
| 947 | # We only care about some events in this function. For others we
|
|---|
| 948 | # return now.
|
|---|
| 949 | case "$event_name" in
|
|---|
| 950 | monitor|ipreallocated|reconfigure) : ;;
|
|---|
| 951 | *) return 0 ;;
|
|---|
| 952 | esac
|
|---|
| 953 |
|
|---|
| 954 | if ctdb_reconfigure_take_lock ; then
|
|---|
| 955 | # No events covered by this function are running, so proceed
|
|---|
| 956 | # with gay abandon.
|
|---|
| 957 | case "$event_name" in
|
|---|
| 958 | reconfigure)
|
|---|
| 959 | (ctdb_service_reconfigure)
|
|---|
| 960 | exit $?
|
|---|
| 961 | ;;
|
|---|
| 962 | ipreallocated)
|
|---|
| 963 | if ctdb_service_needs_reconfigure ; then
|
|---|
| 964 | ctdb_service_reconfigure
|
|---|
| 965 | fi
|
|---|
| 966 | ;;
|
|---|
| 967 | esac
|
|---|
| 968 |
|
|---|
| 969 | ctdb_reconfigure_release_lock
|
|---|
| 970 | else
|
|---|
| 971 | # Somebody else is running an event we don't want to collide
|
|---|
| 972 | # with. We proceed with caution.
|
|---|
| 973 | case "$event_name" in
|
|---|
| 974 | reconfigure)
|
|---|
| 975 | # Tell whoever called us to retry.
|
|---|
| 976 | exit 2
|
|---|
| 977 | ;;
|
|---|
| 978 | ipreallocated)
|
|---|
| 979 | # Defer any scheduled reconfigure and just run the
|
|---|
| 980 | # rest of the ipreallocated event, as per the
|
|---|
| 981 | # eventscript. There's an assumption here that the
|
|---|
| 982 | # event doesn't depend on any scheduled reconfigure.
|
|---|
| 983 | # This is true in the current code.
|
|---|
| 984 | return 0
|
|---|
| 985 | ;;
|
|---|
| 986 | monitor)
|
|---|
| 987 | # There is most likely a reconfigure in progress so
|
|---|
| 988 | # the service is possibly unstable. As above, we
|
|---|
| 989 | # defer any scheduled reconfigured. We also replay
|
|---|
| 990 | # the previous monitor status since that's the best
|
|---|
| 991 | # information we have.
|
|---|
| 992 | ctdb_replay_monitor_status
|
|---|
| 993 | ;;
|
|---|
| 994 | esac
|
|---|
| 995 | fi
|
|---|
| 996 | }
|
|---|
| 997 |
|
|---|
| 998 | ##################################################################
|
|---|
| 999 | # Does CTDB manage this service? - and associated auto-start/stop
|
|---|
| 1000 |
|
|---|
| 1001 | ctdb_compat_managed_service ()
|
|---|
| 1002 | {
|
|---|
| 1003 | if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
|
|---|
| 1004 | CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
|
|---|
| 1005 | fi
|
|---|
| 1006 | }
|
|---|
| 1007 |
|
|---|
| 1008 | is_ctdb_managed_service ()
|
|---|
| 1009 | {
|
|---|
| 1010 | assert_service_name
|
|---|
| 1011 |
|
|---|
| 1012 | # $t is used just for readability and to allow better accurate
|
|---|
| 1013 | # matching via leading/trailing spaces
|
|---|
| 1014 | t=" $CTDB_MANAGED_SERVICES "
|
|---|
| 1015 |
|
|---|
| 1016 | # Return 0 if "<space>$service_name<space>" appears in $t
|
|---|
| 1017 | if [ "${t#* ${service_name} }" != "${t}" ] ; then
|
|---|
| 1018 | return 0
|
|---|
| 1019 | fi
|
|---|
| 1020 |
|
|---|
| 1021 | # If above didn't match then update $CTDB_MANAGED_SERVICES for
|
|---|
| 1022 | # backward compatibility and try again.
|
|---|
| 1023 | ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
|
|---|
| 1024 | ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
|
|---|
| 1025 | ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
|
|---|
| 1026 | ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "apache2"
|
|---|
| 1027 | ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
|
|---|
| 1028 | ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
|
|---|
| 1029 | ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
|
|---|
| 1030 | ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
|
|---|
| 1031 |
|
|---|
| 1032 | t=" $CTDB_MANAGED_SERVICES "
|
|---|
| 1033 |
|
|---|
| 1034 | # Return 0 if "<space>$service_name<space>" appears in $t
|
|---|
| 1035 | [ "${t#* ${service_name} }" != "${t}" ]
|
|---|
| 1036 | }
|
|---|
| 1037 |
|
|---|
| 1038 | ctdb_start_stop_service ()
|
|---|
| 1039 | {
|
|---|
| 1040 | assert_service_name
|
|---|
| 1041 |
|
|---|
| 1042 | # Allow service-start/service-stop pseudo-events to start/stop
|
|---|
| 1043 | # services when we're not auto-starting/stopping and we're not
|
|---|
| 1044 | # monitoring.
|
|---|
| 1045 | case "$event_name" in
|
|---|
| 1046 | service-start)
|
|---|
| 1047 | if is_ctdb_managed_service ; then
|
|---|
| 1048 | die 'service-start event not permitted when service is managed'
|
|---|
| 1049 | fi
|
|---|
| 1050 | if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
|
|---|
| 1051 | die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
|
|---|
| 1052 | fi
|
|---|
| 1053 | ctdb_service_start
|
|---|
| 1054 | exit $?
|
|---|
| 1055 | ;;
|
|---|
| 1056 | service-stop)
|
|---|
| 1057 | if is_ctdb_managed_service ; then
|
|---|
| 1058 | die 'service-stop event not permitted when service is managed'
|
|---|
| 1059 | fi
|
|---|
| 1060 | if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
|
|---|
| 1061 | die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
|
|---|
| 1062 | fi
|
|---|
| 1063 | ctdb_service_stop
|
|---|
| 1064 | exit $?
|
|---|
| 1065 | ;;
|
|---|
| 1066 | esac
|
|---|
| 1067 |
|
|---|
| 1068 | # Do nothing unless configured to...
|
|---|
| 1069 | [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
|
|---|
| 1070 |
|
|---|
| 1071 | [ "$event_name" = "monitor" ] || return 0
|
|---|
| 1072 |
|
|---|
| 1073 | if is_ctdb_managed_service ; then
|
|---|
| 1074 | if ! is_ctdb_previously_managed_service ; then
|
|---|
| 1075 | echo "Starting service \"$service_name\" - now managed"
|
|---|
| 1076 | background_with_logging ctdb_service_start
|
|---|
| 1077 | exit $?
|
|---|
| 1078 | fi
|
|---|
| 1079 | else
|
|---|
| 1080 | if is_ctdb_previously_managed_service ; then
|
|---|
| 1081 | echo "Stopping service \"$service_name\" - no longer managed"
|
|---|
| 1082 | background_with_logging ctdb_service_stop
|
|---|
| 1083 | exit $?
|
|---|
| 1084 | fi
|
|---|
| 1085 | fi
|
|---|
| 1086 | }
|
|---|
| 1087 |
|
|---|
| 1088 | ctdb_service_start ()
|
|---|
| 1089 | {
|
|---|
| 1090 | # The service is marked managed if we've ever tried to start it.
|
|---|
| 1091 | ctdb_service_managed
|
|---|
| 1092 |
|
|---|
| 1093 | service_start || return $?
|
|---|
| 1094 |
|
|---|
| 1095 | ctdb_counter_init
|
|---|
| 1096 | ctdb_check_tcp_init
|
|---|
| 1097 | }
|
|---|
| 1098 |
|
|---|
| 1099 | ctdb_service_stop ()
|
|---|
| 1100 | {
|
|---|
| 1101 | ctdb_service_unmanaged
|
|---|
| 1102 | service_stop
|
|---|
| 1103 | }
|
|---|
| 1104 |
|
|---|
| 1105 | # Default service_start() and service_stop() functions.
|
|---|
| 1106 |
|
|---|
| 1107 | # These may be overridden in an eventscript.
|
|---|
| 1108 | service_start ()
|
|---|
| 1109 | {
|
|---|
| 1110 | service "$service_name" start
|
|---|
| 1111 | }
|
|---|
| 1112 |
|
|---|
| 1113 | service_stop ()
|
|---|
| 1114 | {
|
|---|
| 1115 | service "$service_name" stop
|
|---|
| 1116 | }
|
|---|
| 1117 |
|
|---|
| 1118 | ##################################################################
|
|---|
| 1119 |
|
|---|
| 1120 | ctdb_standard_event_handler ()
|
|---|
| 1121 | {
|
|---|
| 1122 | :
|
|---|
| 1123 | }
|
|---|
| 1124 |
|
|---|
| 1125 | iptables_wrapper ()
|
|---|
| 1126 | {
|
|---|
| 1127 | _family="$1" ; shift
|
|---|
| 1128 | if [ "$_family" = "inet6" ] ; then
|
|---|
| 1129 | _iptables_cmd="ip6tables"
|
|---|
| 1130 | else
|
|---|
| 1131 | _iptables_cmd="iptables"
|
|---|
| 1132 | fi
|
|---|
| 1133 |
|
|---|
| 1134 | # iptables doesn't like being re-entered, so flock-wrap it.
|
|---|
| 1135 | flock -w 30 "${CTDB_SCRIPT_VARDIR}/iptables.flock" "$_iptables_cmd" "$@"
|
|---|
| 1136 | }
|
|---|
| 1137 |
|
|---|
| 1138 | # AIX (and perhaps others?) doesn't have mktemp
|
|---|
| 1139 | if ! type mktemp >/dev/null 2>&1 ; then
|
|---|
| 1140 | mktemp ()
|
|---|
| 1141 | {
|
|---|
| 1142 | _dir=false
|
|---|
| 1143 | if [ "$1" = "-d" ] ; then
|
|---|
| 1144 | _dir=true
|
|---|
| 1145 | shift
|
|---|
| 1146 | fi
|
|---|
| 1147 | _d="${TMPDIR:-/tmp}"
|
|---|
| 1148 | _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
|
|---|
| 1149 | md5sum | \
|
|---|
| 1150 | sed -e 's@\(..........\).*@\1@')
|
|---|
| 1151 | _t="${_d}/tmp.${_hex10}"
|
|---|
| 1152 | (
|
|---|
| 1153 | umask 077
|
|---|
| 1154 | if $_dir ; then
|
|---|
| 1155 | mkdir "$_t"
|
|---|
| 1156 | else
|
|---|
| 1157 | >"$_t"
|
|---|
| 1158 | fi
|
|---|
| 1159 | )
|
|---|
| 1160 | echo "$_t"
|
|---|
| 1161 | }
|
|---|
| 1162 | fi
|
|---|
| 1163 |
|
|---|
| 1164 | ########################################################
|
|---|
| 1165 | # tickle handling
|
|---|
| 1166 | ########################################################
|
|---|
| 1167 |
|
|---|
| 1168 | update_tickles ()
|
|---|
| 1169 | {
|
|---|
| 1170 | _port="$1"
|
|---|
| 1171 |
|
|---|
| 1172 | tickledir="${CTDB_SCRIPT_VARDIR}/tickles"
|
|---|
| 1173 | mkdir -p "$tickledir"
|
|---|
| 1174 |
|
|---|
| 1175 | ctdb_get_pnn
|
|---|
| 1176 |
|
|---|
| 1177 | # What public IPs do I hold?
|
|---|
| 1178 | _ips=$(ctdb -X ip | awk -F'|' -v pnn=$pnn '$3 == pnn {print $2}')
|
|---|
| 1179 |
|
|---|
| 1180 | # IPs as a regexp choice
|
|---|
| 1181 | _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
|
|---|
| 1182 |
|
|---|
| 1183 | # Record connections to our public IPs in a temporary file.
|
|---|
| 1184 | # This temporary file is in CTDB's private state directory and
|
|---|
| 1185 | # $$ is used to avoid a very rare race involving CTDB's script
|
|---|
| 1186 | # debugging. No security issue, nothing to see here...
|
|---|
| 1187 | _my_connections="${tickledir}/${_port}.connections.$$"
|
|---|
| 1188 | netstat -tn |
|
|---|
| 1189 | awk -v destpat="^${_ipschoice}:${_port}\$" \
|
|---|
| 1190 | '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
|
|---|
| 1191 | sort >"$_my_connections"
|
|---|
| 1192 |
|
|---|
| 1193 | # Record our current tickles in a temporary file
|
|---|
| 1194 | _my_tickles="${tickledir}/${_port}.tickles.$$"
|
|---|
| 1195 | for _i in $_ips ; do
|
|---|
| 1196 | ctdb -X gettickles $_i $_port |
|
|---|
| 1197 | awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
|
|---|
| 1198 | done |
|
|---|
| 1199 | sort >"$_my_tickles"
|
|---|
| 1200 |
|
|---|
| 1201 | # Add tickles for connections that we haven't already got tickles for
|
|---|
| 1202 | comm -23 "$_my_connections" "$_my_tickles" |
|
|---|
| 1203 | while read _src _dst ; do
|
|---|
| 1204 | ctdb addtickle $_src $_dst
|
|---|
| 1205 | done
|
|---|
| 1206 |
|
|---|
| 1207 | # Remove tickles for connections that are no longer there
|
|---|
| 1208 | comm -13 "$_my_connections" "$_my_tickles" |
|
|---|
| 1209 | while read _src _dst ; do
|
|---|
| 1210 | ctdb deltickle $_src $_dst
|
|---|
| 1211 | done
|
|---|
| 1212 |
|
|---|
| 1213 | rm -f "$_my_connections" "$_my_tickles"
|
|---|
| 1214 |
|
|---|
| 1215 | # Remove stale files from killed scripts
|
|---|
| 1216 | find "$tickledir" -type f -mmin +10 | xargs -r rm
|
|---|
| 1217 | }
|
|---|
| 1218 |
|
|---|
| 1219 | ########################################################
|
|---|
| 1220 | # load a site local config file
|
|---|
| 1221 | ########################################################
|
|---|
| 1222 |
|
|---|
| 1223 | [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
|
|---|
| 1224 | . "$CTDB_RC_LOCAL"
|
|---|
| 1225 | }
|
|---|
| 1226 |
|
|---|
| 1227 | [ -x $CTDB_BASE/rc.local ] && {
|
|---|
| 1228 | . $CTDB_BASE/rc.local
|
|---|
| 1229 | }
|
|---|
| 1230 |
|
|---|
| 1231 | [ -d $CTDB_BASE/rc.local.d ] && {
|
|---|
| 1232 | for i in $CTDB_BASE/rc.local.d/* ; do
|
|---|
| 1233 | [ -x "$i" ] && . "$i"
|
|---|
| 1234 | done
|
|---|
| 1235 | }
|
|---|
| 1236 |
|
|---|
| 1237 | script_name="${0##*/}" # basename
|
|---|
| 1238 | service_fail_limit=1
|
|---|
| 1239 | event_name="$1"
|
|---|