1 | #!/bin/bash
|
---|
2 |
|
---|
3 | # Run commands on CTDB nodes.
|
---|
4 |
|
---|
5 | # See http://ctdb.samba.org/ for more information about CTDB.
|
---|
6 |
|
---|
7 | # Copyright (C) Martin Schwenke 2008
|
---|
8 |
|
---|
9 | # Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
|
---|
10 |
|
---|
11 | # Copyright (C) Andrew Tridgell 2007
|
---|
12 |
|
---|
13 | # This program is free software; you can redistribute it and/or modify
|
---|
14 | # it under the terms of the GNU General Public License as published by
|
---|
15 | # the Free Software Foundation; either version 3 of the License, or
|
---|
16 | # (at your option) any later version.
|
---|
17 |
|
---|
18 | # This program is distributed in the hope that it will be useful,
|
---|
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
21 | # GNU General Public License for more details.
|
---|
22 |
|
---|
23 | # You should have received a copy of the GNU General Public License
|
---|
24 | # along with this program; if not, see <http://www.gnu.org/licenses/>.
|
---|
25 |
|
---|
26 | prog=$(basename $0)
|
---|
27 |
|
---|
28 | usage ()
|
---|
29 | {
|
---|
30 | cat >&2 <<EOF
|
---|
31 | Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
|
---|
32 | options:
|
---|
33 | -c Run in current working directory on specified nodes.
|
---|
34 | -f Specify nodes file, overrides CTDB_NODES_FILE.
|
---|
35 | -i Keep standard input open - the default is to close it.
|
---|
36 | -n Allow nodes to be specified by name.
|
---|
37 | -o <prefix> Save standard output from each node to file <prefix>.<ip>
|
---|
38 | -p Run command in parallel on specified nodes.
|
---|
39 | -P Push given files to nodes instead of running commands.
|
---|
40 | -q Do not print node addresses (overrides -v).
|
---|
41 | -v Print node address even for a single node.
|
---|
42 | <NODES> "all", "any", "ok" (or "healthy"), "con" (or "connected"),
|
---|
43 | "rm" (or "recmaster"), "lvs" (or "lvsmaster"),
|
---|
44 | "natgw" (or "natgwlist"); or
|
---|
45 | a node number (0 base); or
|
---|
46 | a hostname (if -n is specified); or
|
---|
47 | list (comma separated) of <NODES>; or
|
---|
48 | range (hyphen separated) of node numbers.
|
---|
49 | EOF
|
---|
50 | exit 1
|
---|
51 |
|
---|
52 | }
|
---|
53 |
|
---|
54 | invalid_nodespec ()
|
---|
55 | {
|
---|
56 | echo "Invalid <nodespec>" >&2 ; echo >&2
|
---|
57 | usage
|
---|
58 | }
|
---|
59 |
|
---|
60 | # Defaults.
|
---|
61 | current=false
|
---|
62 | parallel=false
|
---|
63 | verbose=false
|
---|
64 | quiet=false
|
---|
65 | prefix=""
|
---|
66 | names_ok=false
|
---|
67 | push=false
|
---|
68 | stdin=false
|
---|
69 |
|
---|
70 | if [ -z "$CTDB_BASE" ] ; then
|
---|
71 | CTDB_BASE="/usr/local/etc/ctdb"
|
---|
72 | fi
|
---|
73 |
|
---|
74 | . "${CTDB_BASE}/functions"
|
---|
75 | loadconfig "ctdb"
|
---|
76 |
|
---|
77 | parse_options ()
|
---|
78 | {
|
---|
79 | # $POSIXLY_CORRECT means that the command passed to onnode can
|
---|
80 | # take options and getopt won't reorder things to make them
|
---|
81 | # options ot onnode.
|
---|
82 | local temp
|
---|
83 | # Not on the previous line - local returns 0!
|
---|
84 | temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cf:hno:pqvPi" -l help -- "$@")
|
---|
85 |
|
---|
86 | [ $? != 0 ] && usage
|
---|
87 |
|
---|
88 | eval set -- "$temp"
|
---|
89 |
|
---|
90 | while true ; do
|
---|
91 | case "$1" in
|
---|
92 | -c) current=true ; shift ;;
|
---|
93 | -f) CTDB_NODES_FILE="$2" ; shift 2 ;;
|
---|
94 | -n) names_ok=true ; shift ;;
|
---|
95 | -o) prefix="$2" ; shift 2 ;;
|
---|
96 | -p) parallel=true ; shift ;;
|
---|
97 | -q) quiet=true ; shift ;;
|
---|
98 | -v) verbose=true ; shift ;;
|
---|
99 | -P) push=true ; shift ;;
|
---|
100 | -i) stdin=true ; shift ;;
|
---|
101 | --) shift ; break ;;
|
---|
102 | -h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
|
---|
103 | esac
|
---|
104 | done
|
---|
105 |
|
---|
106 | [ $# -lt 2 ] && usage
|
---|
107 |
|
---|
108 | nodespec="$1" ; shift
|
---|
109 | command="$@"
|
---|
110 | }
|
---|
111 |
|
---|
112 | echo_nth ()
|
---|
113 | {
|
---|
114 | local n="$1" ; shift
|
---|
115 |
|
---|
116 | shift $n
|
---|
117 | local node="$1"
|
---|
118 |
|
---|
119 | if [ -n "$node" -a "$node" != "#DEAD" ] ; then
|
---|
120 | echo $node
|
---|
121 | else
|
---|
122 | echo "${prog}: \"node ${n}\" does not exist" >&2
|
---|
123 | exit 1
|
---|
124 | fi
|
---|
125 | }
|
---|
126 |
|
---|
127 | parse_nodespec ()
|
---|
128 | {
|
---|
129 | # Subshell avoids hacks to restore $IFS.
|
---|
130 | (
|
---|
131 | IFS=","
|
---|
132 | for i in $1 ; do
|
---|
133 | case "$i" in
|
---|
134 | *-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
|
---|
135 | # Separate lines for readability.
|
---|
136 | all|any|ok|healthy|con|connected) echo "$i" ;;
|
---|
137 | rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
|
---|
138 | *)
|
---|
139 | [ $i -gt -1 ] 2>/dev/null || $names_ok || invalid_nodespec
|
---|
140 | echo $i
|
---|
141 | esac
|
---|
142 | done
|
---|
143 | )
|
---|
144 | }
|
---|
145 |
|
---|
146 | ctdb_status_output="" # cache
|
---|
147 | get_nodes_with_status ()
|
---|
148 | {
|
---|
149 | local all_nodes="$1"
|
---|
150 | local status="$2"
|
---|
151 |
|
---|
152 | if [ -z "$ctdb_status_output" ] ; then
|
---|
153 | ctdb_status_output=$(ctdb -X status 2>&1)
|
---|
154 | if [ $? -ne 0 ] ; then
|
---|
155 | echo "${prog}: unable to get status of CTDB nodes" >&2
|
---|
156 | echo "$ctdb_status_output" >&2
|
---|
157 | exit 1
|
---|
158 | fi
|
---|
159 | local nl="
|
---|
160 | "
|
---|
161 | ctdb_status_output="${ctdb_status_output#*${nl}}"
|
---|
162 | fi
|
---|
163 |
|
---|
164 | (
|
---|
165 | local i
|
---|
166 | IFS="${IFS}|"
|
---|
167 | while IFS="" read i ; do
|
---|
168 |
|
---|
169 | set -- $i # split line on colons
|
---|
170 | shift # line starts with : so 1st field is empty
|
---|
171 | local pnn="$1" ; shift
|
---|
172 | local ip="$1" ; shift
|
---|
173 |
|
---|
174 | case "$status" in
|
---|
175 | healthy)
|
---|
176 | # If any bit is 1, don't match this address.
|
---|
177 | local s
|
---|
178 | for s ; do
|
---|
179 | [ "$s" != "1" ] || continue 2
|
---|
180 | done
|
---|
181 | ;;
|
---|
182 | connected)
|
---|
183 | # If disconnected bit is not 0, don't match this address.
|
---|
184 | [ "$1" = "0" ] || continue
|
---|
185 | ;;
|
---|
186 | *)
|
---|
187 | invalid_nodespec
|
---|
188 | esac
|
---|
189 |
|
---|
190 | echo_nth "$pnn" $all_nodes
|
---|
191 | done <<<"$ctdb_status_output"
|
---|
192 | )
|
---|
193 | }
|
---|
194 |
|
---|
195 | ctdb_props="" # cache
|
---|
196 | get_node_with_property ()
|
---|
197 | {
|
---|
198 | local all_nodes="$1"
|
---|
199 | local prop="$2"
|
---|
200 |
|
---|
201 | local prop_node=""
|
---|
202 | if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
|
---|
203 | # Not in cache.
|
---|
204 | prop_node=$(ctdb "$prop" -X 2>/dev/null)
|
---|
205 | if [ $? -eq 0 ] ; then
|
---|
206 | if [ "$prop" = "natgwlist" ] ; then
|
---|
207 | prop_node="${prop_node%% *}" # 1st word
|
---|
208 | if [ "$prop_node" = "-1" ] ; then
|
---|
209 | # This works around natgwlist returning 0 even
|
---|
210 | # when there's no natgw.
|
---|
211 | prop_node=""
|
---|
212 | fi
|
---|
213 | else
|
---|
214 | # We only want the first line.
|
---|
215 | local nl="
|
---|
216 | "
|
---|
217 | prop_node="${prop_node%%${nl}*}"
|
---|
218 | fi
|
---|
219 | else
|
---|
220 | prop_node=""
|
---|
221 | fi
|
---|
222 |
|
---|
223 | if [ -n "$prop_node" ] ; then
|
---|
224 | # Add to cache.
|
---|
225 | ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
|
---|
226 | fi
|
---|
227 | else
|
---|
228 | # Get from cache.
|
---|
229 | prop_node="${ctdb_props##:${prop}:}"
|
---|
230 | prop_node="${prop_node%% *}"
|
---|
231 | fi
|
---|
232 |
|
---|
233 | if [ -n "$prop_node" ] ; then
|
---|
234 | echo_nth "$prop_node" $all_nodes
|
---|
235 | else
|
---|
236 | echo "${prog}: No ${prop} available" >&2
|
---|
237 | exit 1
|
---|
238 | fi
|
---|
239 | }
|
---|
240 |
|
---|
241 | get_any_available_node ()
|
---|
242 | {
|
---|
243 | local all_nodes="$1"
|
---|
244 |
|
---|
245 | # We do a recursive onnode to find which nodes are up and running.
|
---|
246 | local out=$($0 -pq all ctdb pnn 2>&1)
|
---|
247 | local line
|
---|
248 | while read line ; do
|
---|
249 | local pnn="${line#PNN:}"
|
---|
250 | if [ "$pnn" != "$line" ] ; then
|
---|
251 | echo_nth "$pnn" $all_nodes
|
---|
252 | return 0
|
---|
253 | fi
|
---|
254 | # Else must be an error message from a down node.
|
---|
255 | done <<<"$out"
|
---|
256 | return 1
|
---|
257 | }
|
---|
258 |
|
---|
259 | get_nodes ()
|
---|
260 | {
|
---|
261 | local all_nodes
|
---|
262 |
|
---|
263 | if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
---|
264 | all_nodes="$CTDB_NODES_SOCKETS"
|
---|
265 | else
|
---|
266 | local f="${CTDB_BASE}/nodes"
|
---|
267 | if [ -n "$CTDB_NODES_FILE" ] ; then
|
---|
268 | f="$CTDB_NODES_FILE"
|
---|
269 | if [ ! -e "$f" -a "${f#/}" = "$f" ] ; then
|
---|
270 | # $f is relative, try in $CTDB_BASE
|
---|
271 | f="${CTDB_BASE}/${f}"
|
---|
272 | fi
|
---|
273 | elif [ -n "$CTDB_NODES" ] ; then
|
---|
274 | f="$CTDB_NODES"
|
---|
275 | fi
|
---|
276 |
|
---|
277 | if [ ! -r "$f" ] ; then
|
---|
278 | echo "${prog}: unable to open nodes file \"${f}\"" >&2
|
---|
279 | exit 1
|
---|
280 | fi
|
---|
281 |
|
---|
282 | all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' "$f")
|
---|
283 | fi
|
---|
284 |
|
---|
285 | local nodes=""
|
---|
286 | local n
|
---|
287 | for n in $(parse_nodespec "$1") ; do
|
---|
288 | [ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
---|
289 | case "$n" in
|
---|
290 | all)
|
---|
291 | echo "${all_nodes//#DEAD/}"
|
---|
292 | ;;
|
---|
293 | any)
|
---|
294 | get_any_available_node "$all_nodes" || exit 1
|
---|
295 | ;;
|
---|
296 | ok|healthy)
|
---|
297 | get_nodes_with_status "$all_nodes" "healthy" || exit 1
|
---|
298 | ;;
|
---|
299 | con|connected)
|
---|
300 | get_nodes_with_status "$all_nodes" "connected" || exit 1
|
---|
301 | ;;
|
---|
302 | rm|recmaster)
|
---|
303 | get_node_with_property "$all_nodes" "recmaster" || exit 1
|
---|
304 | ;;
|
---|
305 | lvs|lvsmaster)
|
---|
306 | get_node_with_property "$all_nodes" "lvsmaster" || exit 1
|
---|
307 | ;;
|
---|
308 | natgw|natgwlist)
|
---|
309 | get_node_with_property "$all_nodes" "natgwlist" || exit 1
|
---|
310 | ;;
|
---|
311 | [0-9]|[0-9][0-9]|[0-9][0-9][0-9])
|
---|
312 | echo_nth $n $all_nodes
|
---|
313 | ;;
|
---|
314 | *)
|
---|
315 | $names_ok || invalid_nodespec
|
---|
316 | echo $n
|
---|
317 | esac
|
---|
318 | done
|
---|
319 | }
|
---|
320 |
|
---|
321 | push()
|
---|
322 | {
|
---|
323 | local host="$1"
|
---|
324 | local files="$2"
|
---|
325 |
|
---|
326 | local f
|
---|
327 | for f in $files ; do
|
---|
328 | $verbose && echo "Pushing $f"
|
---|
329 | case "$f" in
|
---|
330 | /*) rsync "$f" "[${host}]:${f}" ;;
|
---|
331 | *) rsync "${PWD}/${f}" "[${host}]:${PWD}/${f}" ;;
|
---|
332 | esac
|
---|
333 | done
|
---|
334 | }
|
---|
335 |
|
---|
336 | fakessh ()
|
---|
337 | {
|
---|
338 | CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
|
---|
339 | }
|
---|
340 |
|
---|
341 | stdout_filter ()
|
---|
342 | {
|
---|
343 | if [ -n "$prefix" ] ; then
|
---|
344 | cat >"${prefix}.${n//\//_}"
|
---|
345 | elif $verbose && $parallel ; then
|
---|
346 | sed -e "s@^@[$n] @"
|
---|
347 | else
|
---|
348 | cat
|
---|
349 | fi
|
---|
350 | }
|
---|
351 |
|
---|
352 | stderr_filter ()
|
---|
353 | {
|
---|
354 | if $verbose && $parallel ; then
|
---|
355 | sed -e "s@^@[$n] @"
|
---|
356 | else
|
---|
357 | cat
|
---|
358 | fi
|
---|
359 | }
|
---|
360 |
|
---|
361 | ######################################################################
|
---|
362 |
|
---|
363 | parse_options "$@"
|
---|
364 |
|
---|
365 | ssh_opts=
|
---|
366 | if $push ; then
|
---|
367 | SSH=push
|
---|
368 | EXTRA_SSH_OPTS=""
|
---|
369 | else
|
---|
370 | $current && command="cd $PWD && $command"
|
---|
371 |
|
---|
372 | if [ -n "$CTDB_NODES_SOCKETS" ] ; then
|
---|
373 | SSH=fakessh
|
---|
374 | EXTRA_SSH_OPTS=""
|
---|
375 | else
|
---|
376 | # Could "2>/dev/null || true" but want to see errors from typos in file.
|
---|
377 | [ -r "${CTDB_BASE}/onnode.conf" ] && . "${CTDB_BASE}/onnode.conf"
|
---|
378 | [ -n "$SSH" ] || SSH=ssh
|
---|
379 | if [ "$SSH" = "ssh" ] ; then
|
---|
380 | if $parallel || ! $stdin ; then
|
---|
381 | ssh_opts="-n"
|
---|
382 | fi
|
---|
383 | else
|
---|
384 | : # rsh? All bets are off!
|
---|
385 | fi
|
---|
386 | fi
|
---|
387 | fi
|
---|
388 |
|
---|
389 | ######################################################################
|
---|
390 |
|
---|
391 | nodes=$(get_nodes "$nodespec")
|
---|
392 | [ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
|
---|
393 |
|
---|
394 | if $quiet ; then
|
---|
395 | verbose=false
|
---|
396 | else
|
---|
397 | # If $nodes contains a space or a newline then assume multiple nodes.
|
---|
398 | nl="
|
---|
399 | "
|
---|
400 | [ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
|
---|
401 | fi
|
---|
402 |
|
---|
403 | pids=""
|
---|
404 | trap 'kill -TERM $pids 2>/dev/null' INT TERM
|
---|
405 | # There's a small race here where the kill can fail if no processes
|
---|
406 | # have been added to $pids and the script is interrupted. However,
|
---|
407 | # the part of the window where it matter is very small.
|
---|
408 | retcode=0
|
---|
409 | for n in $nodes ; do
|
---|
410 | set -o pipefail 2>/dev/null
|
---|
411 | if $parallel ; then
|
---|
412 | { exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
|
---|
413 | pids="${pids} $!"
|
---|
414 | else
|
---|
415 | if $verbose ; then
|
---|
416 | echo >&2 ; echo ">> NODE: $n <<" >&2
|
---|
417 | fi
|
---|
418 |
|
---|
419 | { exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
|
---|
420 | [ $? = 0 ] || retcode=$?
|
---|
421 | fi
|
---|
422 | done
|
---|
423 |
|
---|
424 | $parallel && {
|
---|
425 | for p in $pids; do
|
---|
426 | wait $p
|
---|
427 | [ $? = 0 ] || retcode=$?
|
---|
428 | done
|
---|
429 | }
|
---|
430 |
|
---|
431 | exit $retcode
|
---|