| 1 | #!/bin/bash
|
|---|
| 2 |
|
|---|
| 3 | # Run commands on CTDB nodes.
|
|---|
| 4 |
|
|---|
| 5 | # See http://ctdb.samba.org/ for more information about CTDB.
|
|---|
| 6 |
|
|---|
| 7 | # Copyright (C) Martin Schwenke 2008
|
|---|
| 8 |
|
|---|
| 9 | # Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
|
|---|
| 10 |
|
|---|
| 11 | # Copyright (C) Andrew Tridgell 2007
|
|---|
| 12 |
|
|---|
| 13 | # This program is free software; you can redistribute it and/or modify
|
|---|
| 14 | # it under the terms of the GNU General Public License as published by
|
|---|
| 15 | # the Free Software Foundation; either version 3 of the License, or
|
|---|
| 16 | # (at your option) any later version.
|
|---|
| 17 |
|
|---|
| 18 | # This program is distributed in the hope that it will be useful,
|
|---|
| 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 21 | # GNU General Public License for more details.
|
|---|
| 22 |
|
|---|
| 23 | # You should have received a copy of the GNU General Public License
|
|---|
| 24 | # along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|---|
| 25 |
|
|---|
| 26 | prog=$(basename $0)
|
|---|
| 27 |
|
|---|
| 28 | usage ()
|
|---|
| 29 | {
|
|---|
| 30 | cat >&2 <<EOF
|
|---|
| 31 | Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
|
|---|
| 32 | options:
|
|---|
| 33 | -c Run in current working directory on specified nodes.
|
|---|
| 34 | -f Specify nodes file, overrides CTDB_NODES_FILE.
|
|---|
| 35 | -i Keep standard input open - the default is to close it.
|
|---|
| 36 | -n Allow nodes to be specified by name.
|
|---|
| 37 | -o <prefix> Save standard output from each node to file <prefix>.<ip>
|
|---|
| 38 | -p Run command in parallel on specified nodes.
|
|---|
| 39 | -P Push given files to nodes instead of running commands.
|
|---|
| 40 | -q Do not print node addresses (overrides -v).
|
|---|
| 41 | -v Print node address even for a single node.
|
|---|
| 42 | <NODES> "all", "any", "ok" (or "healthy"), "con" (or "connected"),
|
|---|
| 43 | "rm" (or "recmaster"), "lvs" (or "lvsmaster"),
|
|---|
| 44 | "natgw" (or "natgwlist"); or
|
|---|
| 45 | a node number (0 base); or
|
|---|
| 46 | a hostname (if -n is specified); or
|
|---|
| 47 | list (comma separated) of <NODES>; or
|
|---|
| 48 | range (hyphen separated) of node numbers.
|
|---|
| 49 | EOF
|
|---|
| 50 | exit 1
|
|---|
| 51 |
|
|---|
| 52 | }
|
|---|
| 53 |
|
|---|
| 54 | invalid_nodespec ()
|
|---|
| 55 | {
|
|---|
| 56 | echo "Invalid <nodespec>" >&2 ; echo >&2
|
|---|
| 57 | usage
|
|---|
| 58 | }
|
|---|
| 59 |
|
|---|
| 60 | # Defaults.
|
|---|
| 61 | current=false
|
|---|
| 62 | parallel=false
|
|---|
| 63 | verbose=false
|
|---|
| 64 | quiet=false
|
|---|
| 65 | prefix=""
|
|---|
| 66 | names_ok=false
|
|---|
| 67 | push=false
|
|---|
| 68 | stdin=false
|
|---|
| 69 |
|
|---|
| 70 | if [ -z "$CTDB_BASE" ] ; then
|
|---|
| 71 | CTDB_BASE="/usr/local/etc/ctdb"
|
|---|
| 72 | fi
|
|---|
| 73 |
|
|---|
| 74 | . "${CTDB_BASE}/functions"
|
|---|
| 75 | loadconfig "ctdb"
|
|---|
| 76 |
|
|---|
| 77 | parse_options ()
|
|---|
| 78 | {
|
|---|
| 79 | # $POSIXLY_CORRECT means that the command passed to onnode can
|
|---|
| 80 | # take options and getopt won't reorder things to make them
|
|---|
| 81 | # options ot onnode.
|
|---|
| 82 | local temp
|
|---|
| 83 | # Not on the previous line - local returns 0!
|
|---|
| 84 | temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cf:hno:pqvPi" -l help -- "$@")
|
|---|
| 85 |
|
|---|
| 86 | [ $? != 0 ] && usage
|
|---|
| 87 |
|
|---|
| 88 | eval set -- "$temp"
|
|---|
| 89 |
|
|---|
| 90 | while true ; do
|
|---|
| 91 | case "$1" in
|
|---|
| 92 | -c) current=true ; shift ;;
|
|---|
| 93 | -f) CTDB_NODES_FILE="$2" ; shift 2 ;;
|
|---|
| 94 | -n) names_ok=true ; shift ;;
|
|---|
| 95 | -o) prefix="$2" ; shift 2 ;;
|
|---|
| 96 | -p) parallel=true ; shift ;;
|
|---|
| 97 | -q) quiet=true ; shift ;;
|
|---|
| 98 | -v) verbose=true ; shift ;;
|
|---|
| 99 | -P) push=true ; shift ;;
|
|---|
| 100 | -i) stdin=true ; shift ;;
|
|---|
| 101 | --) shift ; break ;;
|
|---|
| 102 | -h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
|
|---|
| 103 | esac
|
|---|
| 104 | done
|
|---|
| 105 |
|
|---|
| 106 | [ $# -lt 2 ] && usage
|
|---|
| 107 |
|
|---|
| 108 | nodespec="$1" ; shift
|
|---|
| 109 | command="$@"
|
|---|
| 110 | }
|
|---|
| 111 |
|
|---|
| 112 | echo_nth ()
|
|---|
| 113 | {
|
|---|
| 114 | local n="$1" ; shift
|
|---|
| 115 |
|
|---|
| 116 | shift $n
|
|---|
| 117 | local node="$1"
|
|---|
| 118 |
|
|---|
| 119 | if [ -n "$node" -a "$node" != "#DEAD" ] ; then
|
|---|
| 120 | echo $node
|
|---|
| 121 | else
|
|---|
| 122 | echo "${prog}: \"node ${n}\" does not exist" >&2
|
|---|
| 123 | exit 1
|
|---|
| 124 | fi
|
|---|
| 125 | }
|
|---|
| 126 |
|
|---|
| 127 | parse_nodespec ()
|
|---|
| 128 | {
|
|---|
| 129 | # Subshell avoids hacks to restore $IFS.
|
|---|
| 130 | (
|
|---|
| 131 | IFS=","
|
|---|
| 132 | for i in $1 ; do
|
|---|
| 133 | case "$i" in
|
|---|
| 134 | *-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
|
|---|
| 135 | # Separate lines for readability.
|
|---|
| 136 | all|any|ok|healthy|con|connected) echo "$i" ;;
|
|---|
| 137 | rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
|
|---|
| 138 | *)
|
|---|
| 139 | [ $i -gt -1 ] 2>/dev/null || $names_ok || invalid_nodespec
|
|---|
| 140 | echo $i
|
|---|
| 141 | esac
|
|---|
| 142 | done
|
|---|
| 143 | )
|
|---|
| 144 | }
|
|---|
| 145 |
|
|---|
| 146 | ctdb_status_output="" # cache
|
|---|
| 147 | get_nodes_with_status ()
|
|---|
| 148 | {
|
|---|
| 149 | local all_nodes="$1"
|
|---|
| 150 | local status="$2"
|
|---|
| 151 |
|
|---|
| 152 | if [ -z "$ctdb_status_output" ] ; then
|
|---|
| 153 | ctdb_status_output=$(ctdb -X status 2>&1)
|
|---|
| 154 | if [ $? -ne 0 ] ; then
|
|---|
| 155 | echo "${prog}: unable to get status of CTDB nodes" >&2
|
|---|
| 156 | echo "$ctdb_status_output" >&2
|
|---|
| 157 | exit 1
|
|---|
| 158 | fi
|
|---|
| 159 | local nl="
|
|---|
| 160 | "
|
|---|
| 161 | ctdb_status_output="${ctdb_status_output#*${nl}}"
|
|---|
| 162 | fi
|
|---|
| 163 |
|
|---|
| 164 | (
|
|---|
| 165 | local i
|
|---|
| 166 | IFS="${IFS}|"
|
|---|
| 167 | while IFS="" read i ; do
|
|---|
| 168 |
|
|---|
| 169 | set -- $i # split line on colons
|
|---|
| 170 | shift # line starts with : so 1st field is empty
|
|---|
| 171 | local pnn="$1" ; shift
|
|---|
| 172 | local ip="$1" ; shift
|
|---|
| 173 |
|
|---|
| 174 | case "$status" in
|
|---|
| 175 | healthy)
|
|---|
| 176 | # If any bit is 1, don't match this address.
|
|---|
| 177 | local s
|
|---|
| 178 | for s ; do
|
|---|
| 179 | [ "$s" != "1" ] || continue 2
|
|---|
| 180 | done
|
|---|
| 181 | ;;
|
|---|
| 182 | connected)
|
|---|
| 183 | # If disconnected bit is not 0, don't match this address.
|
|---|
| 184 | [ "$1" = "0" ] || continue
|
|---|
| 185 | ;;
|
|---|
| 186 | *)
|
|---|
| 187 | invalid_nodespec
|
|---|
| 188 | esac
|
|---|
| 189 |
|
|---|
| 190 | echo_nth "$pnn" $all_nodes
|
|---|
| 191 | done <<<"$ctdb_status_output"
|
|---|
| 192 | )
|
|---|
| 193 | }
|
|---|
| 194 |
|
|---|
| 195 | ctdb_props="" # cache
|
|---|
| 196 | get_node_with_property ()
|
|---|
| 197 | {
|
|---|
| 198 | local all_nodes="$1"
|
|---|
| 199 | local prop="$2"
|
|---|
| 200 |
|
|---|
| 201 | local prop_node=""
|
|---|
| 202 | if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
|
|---|
| 203 | # Not in cache.
|
|---|
| 204 | prop_node=$(ctdb "$prop" -X 2>/dev/null)
|
|---|
| 205 | if [ $? -eq 0 ] ; then
|
|---|
| 206 | if [ "$prop" = "natgwlist" ] ; then
|
|---|
| 207 | prop_node="${prop_node%% *}" # 1st word
|
|---|
| 208 | if [ "$prop_node" = "-1" ] ; then
|
|---|
| 209 | # This works around natgwlist returning 0 even
|
|---|
| 210 | # when there's no natgw.
|
|---|
| 211 | prop_node=""
|
|---|
| 212 | fi
|
|---|
| 213 | else
|
|---|
| 214 | # We only want the first line.
|
|---|
| 215 | local nl="
|
|---|
| 216 | "
|
|---|
| 217 | prop_node="${prop_node%%${nl}*}"
|
|---|
| 218 | fi
|
|---|
| 219 | else
|
|---|
| 220 | prop_node=""
|
|---|
| 221 | fi
|
|---|
| 222 |
|
|---|
| 223 | if [ -n "$prop_node" ] ; then
|
|---|
| 224 | # Add to cache.
|
|---|
| 225 | ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
|
|---|
| 226 | fi
|
|---|
| 227 | else
|
|---|
| 228 | # Get from cache.
|
|---|
| 229 | prop_node="${ctdb_props##:${prop}:}"
|
|---|
| 230 | prop_node="${prop_node%% *}"
|
|---|
| 231 | fi
|
|---|
| 232 |
|
|---|
| 233 | if [ -n "$prop_node" ] ; then
|
|---|
| 234 | echo_nth "$prop_node" $all_nodes
|
|---|
| 235 | else
|
|---|
| 236 | echo "${prog}: No ${prop} available" >&2
|
|---|
| 237 | exit 1
|
|---|
| 238 | fi
|
|---|
| 239 | }
|
|---|
| 240 |
|
|---|
| 241 | get_any_available_node ()
|
|---|
| 242 | {
|
|---|
| 243 | local all_nodes="$1"
|
|---|
| 244 |
|
|---|
| 245 | # We do a recursive onnode to find which nodes are up and running.
|
|---|
| 246 | local out=$($0 -pq all ctdb pnn 2>&1)
|
|---|
| 247 | local line
|
|---|
| 248 | while read line ; do
|
|---|
| 249 | local pnn="${line#PNN:}"
|
|---|
| 250 | if [ "$pnn" != "$line" ] ; then
|
|---|
| 251 | echo_nth "$pnn" $all_nodes
|
|---|
| 252 | return 0
|
|---|
| 253 | fi
|
|---|
| 254 | # Else must be an error message from a down node.
|
|---|
| 255 | done <<<"$out"
|
|---|
| 256 | return 1
|
|---|
| 257 | }
|
|---|
| 258 |
|
|---|
| 259 | get_nodes ()
|
|---|
| 260 | {
|
|---|
| 261 | local all_nodes
|
|---|
| 262 |
|
|---|
| 263 | if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
|---|
| 264 | all_nodes="$CTDB_NODES_SOCKETS"
|
|---|
| 265 | else
|
|---|
| 266 | local f="${CTDB_BASE}/nodes"
|
|---|
| 267 | if [ -n "$CTDB_NODES_FILE" ] ; then
|
|---|
| 268 | f="$CTDB_NODES_FILE"
|
|---|
| 269 | if [ ! -e "$f" -a "${f#/}" = "$f" ] ; then
|
|---|
| 270 | # $f is relative, try in $CTDB_BASE
|
|---|
| 271 | f="${CTDB_BASE}/${f}"
|
|---|
| 272 | fi
|
|---|
| 273 | elif [ -n "$CTDB_NODES" ] ; then
|
|---|
| 274 | f="$CTDB_NODES"
|
|---|
| 275 | fi
|
|---|
| 276 |
|
|---|
| 277 | if [ ! -r "$f" ] ; then
|
|---|
| 278 | echo "${prog}: unable to open nodes file \"${f}\"" >&2
|
|---|
| 279 | exit 1
|
|---|
| 280 | fi
|
|---|
| 281 |
|
|---|
| 282 | all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' "$f")
|
|---|
| 283 | fi
|
|---|
| 284 |
|
|---|
| 285 | local nodes=""
|
|---|
| 286 | local n
|
|---|
| 287 | for n in $(parse_nodespec "$1") ; do
|
|---|
| 288 | [ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
|---|
| 289 | case "$n" in
|
|---|
| 290 | all)
|
|---|
| 291 | echo "${all_nodes//#DEAD/}"
|
|---|
| 292 | ;;
|
|---|
| 293 | any)
|
|---|
| 294 | get_any_available_node "$all_nodes" || exit 1
|
|---|
| 295 | ;;
|
|---|
| 296 | ok|healthy)
|
|---|
| 297 | get_nodes_with_status "$all_nodes" "healthy" || exit 1
|
|---|
| 298 | ;;
|
|---|
| 299 | con|connected)
|
|---|
| 300 | get_nodes_with_status "$all_nodes" "connected" || exit 1
|
|---|
| 301 | ;;
|
|---|
| 302 | rm|recmaster)
|
|---|
| 303 | get_node_with_property "$all_nodes" "recmaster" || exit 1
|
|---|
| 304 | ;;
|
|---|
| 305 | lvs|lvsmaster)
|
|---|
| 306 | get_node_with_property "$all_nodes" "lvsmaster" || exit 1
|
|---|
| 307 | ;;
|
|---|
| 308 | natgw|natgwlist)
|
|---|
| 309 | get_node_with_property "$all_nodes" "natgwlist" || exit 1
|
|---|
| 310 | ;;
|
|---|
| 311 | [0-9]|[0-9][0-9]|[0-9][0-9][0-9])
|
|---|
| 312 | echo_nth $n $all_nodes
|
|---|
| 313 | ;;
|
|---|
| 314 | *)
|
|---|
| 315 | $names_ok || invalid_nodespec
|
|---|
| 316 | echo $n
|
|---|
| 317 | esac
|
|---|
| 318 | done
|
|---|
| 319 | }
|
|---|
| 320 |
|
|---|
| 321 | push()
|
|---|
| 322 | {
|
|---|
| 323 | local host="$1"
|
|---|
| 324 | local files="$2"
|
|---|
| 325 |
|
|---|
| 326 | local f
|
|---|
| 327 | for f in $files ; do
|
|---|
| 328 | $verbose && echo "Pushing $f"
|
|---|
| 329 | case "$f" in
|
|---|
| 330 | /*) rsync "$f" "[${host}]:${f}" ;;
|
|---|
| 331 | *) rsync "${PWD}/${f}" "[${host}]:${PWD}/${f}" ;;
|
|---|
| 332 | esac
|
|---|
| 333 | done
|
|---|
| 334 | }
|
|---|
| 335 |
|
|---|
| 336 | fakessh ()
|
|---|
| 337 | {
|
|---|
| 338 | CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
|
|---|
| 339 | }
|
|---|
| 340 |
|
|---|
| 341 | stdout_filter ()
|
|---|
| 342 | {
|
|---|
| 343 | if [ -n "$prefix" ] ; then
|
|---|
| 344 | cat >"${prefix}.${n//\//_}"
|
|---|
| 345 | elif $verbose && $parallel ; then
|
|---|
| 346 | sed -e "s@^@[$n] @"
|
|---|
| 347 | else
|
|---|
| 348 | cat
|
|---|
| 349 | fi
|
|---|
| 350 | }
|
|---|
| 351 |
|
|---|
| 352 | stderr_filter ()
|
|---|
| 353 | {
|
|---|
| 354 | if $verbose && $parallel ; then
|
|---|
| 355 | sed -e "s@^@[$n] @"
|
|---|
| 356 | else
|
|---|
| 357 | cat
|
|---|
| 358 | fi
|
|---|
| 359 | }
|
|---|
| 360 |
|
|---|
| 361 | ######################################################################
|
|---|
| 362 |
|
|---|
| 363 | parse_options "$@"
|
|---|
| 364 |
|
|---|
| 365 | ssh_opts=
|
|---|
| 366 | if $push ; then
|
|---|
| 367 | SSH=push
|
|---|
| 368 | EXTRA_SSH_OPTS=""
|
|---|
| 369 | else
|
|---|
| 370 | $current && command="cd $PWD && $command"
|
|---|
| 371 |
|
|---|
| 372 | if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
|---|
| 373 | SSH=fakessh
|
|---|
| 374 | EXTRA_SSH_OPTS=""
|
|---|
| 375 | else
|
|---|
| 376 | # Could "2>/dev/null || true" but want to see errors from typos in file.
|
|---|
| 377 | [ -r "${CTDB_BASE}/onnode.conf" ] && . "${CTDB_BASE}/onnode.conf"
|
|---|
| 378 | [ -n "$SSH" ] || SSH=ssh
|
|---|
| 379 | if [ "$SSH" = "ssh" ] ; then
|
|---|
| 380 | if $parallel || ! $stdin ; then
|
|---|
| 381 | ssh_opts="-n"
|
|---|
| 382 | fi
|
|---|
| 383 | else
|
|---|
| 384 | : # rsh? All bets are off!
|
|---|
| 385 | fi
|
|---|
| 386 | fi
|
|---|
| 387 | fi
|
|---|
| 388 |
|
|---|
| 389 | ######################################################################
|
|---|
| 390 |
|
|---|
| 391 | nodes=$(get_nodes "$nodespec")
|
|---|
| 392 | [ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
|---|
| 393 |
|
|---|
| 394 | if $quiet ; then
|
|---|
| 395 | verbose=false
|
|---|
| 396 | else
|
|---|
| 397 | # If $nodes contains a space or a newline then assume multiple nodes.
|
|---|
| 398 | nl="
|
|---|
| 399 | "
|
|---|
| 400 | [ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
|
|---|
| 401 | fi
|
|---|
| 402 |
|
|---|
| 403 | pids=""
|
|---|
| 404 | trap 'kill -TERM $pids 2>/dev/null' INT TERM
|
|---|
| 405 | # There's a small race here where the kill can fail if no processes
|
|---|
| 406 | # have been added to $pids and the script is interrupted. However,
|
|---|
| 407 | # the part of the window where it matter is very small.
|
|---|
| 408 | retcode=0
|
|---|
| 409 | for n in $nodes ; do
|
|---|
| 410 | set -o pipefail 2>/dev/null
|
|---|
| 411 | if $parallel ; then
|
|---|
| 412 | { exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
|
|---|
| 413 | pids="${pids} $!"
|
|---|
| 414 | else
|
|---|
| 415 | if $verbose ; then
|
|---|
| 416 | echo >&2 ; echo ">> NODE: $n <<" >&2
|
|---|
| 417 | fi
|
|---|
| 418 |
|
|---|
| 419 | { exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
|
|---|
| 420 | [ $? = 0 ] || retcode=$?
|
|---|
| 421 | fi
|
|---|
| 422 | done
|
|---|
| 423 |
|
|---|
| 424 | $parallel && {
|
|---|
| 425 | for p in $pids; do
|
|---|
| 426 | wait $p
|
|---|
| 427 | [ $? = 0 ] || retcode=$?
|
|---|
| 428 | done
|
|---|
| 429 | }
|
|---|
| 430 |
|
|---|
| 431 | exit $retcode
|
|---|