1 | # Hey Emacs, this is a -*- shell-script -*- !!! :-)
|
---|
2 |
|
---|
3 | . "${TEST_SCRIPTS_DIR}/common.sh"
|
---|
4 |
|
---|
5 | ######################################################################
|
---|
6 |
|
---|
7 | export CTDB_TIMEOUT=60
|
---|
8 |
|
---|
9 | if [ -n "$CTDB_TEST_REMOTE_DIR" ] ; then
|
---|
10 | CTDB_TEST_WRAPPER="${CTDB_TEST_REMOTE_DIR}/test_wrap"
|
---|
11 | else
|
---|
12 | _d=$(cd ${TEST_SCRIPTS_DIR}; echo $PWD)
|
---|
13 | CTDB_TEST_WRAPPER="$_d/test_wrap"
|
---|
14 | fi
|
---|
15 | export CTDB_TEST_WRAPPER
|
---|
16 |
|
---|
17 | # If $VALGRIND is set then use it whenever ctdb is called, but only if
|
---|
18 | # $CTDB is not already set.
|
---|
19 | [ -n "$CTDB" ] || export CTDB="${VALGRIND}${VALGRIND:+ }ctdb"
|
---|
20 |
|
---|
21 | # why???
|
---|
22 | PATH="${TEST_SCRIPTS_DIR}:${PATH}"
|
---|
23 |
|
---|
24 | ######################################################################
|
---|
25 |
|
---|
26 | ctdb_test_exit ()
|
---|
27 | {
|
---|
28 | local status=$?
|
---|
29 |
|
---|
30 | trap - 0
|
---|
31 |
|
---|
32 | [ $(($testfailures+0)) -eq 0 -a $status -ne 0 ] && testfailures=$status
|
---|
33 | status=$(($testfailures+0))
|
---|
34 |
|
---|
35 | # Avoid making a test fail from this point onwards. The test is
|
---|
36 | # now complete.
|
---|
37 | set +e
|
---|
38 |
|
---|
39 | echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
|
---|
40 |
|
---|
41 | eval "$ctdb_test_exit_hook" || true
|
---|
42 | unset ctdb_test_exit_hook
|
---|
43 |
|
---|
44 | if $ctdb_test_restart_scheduled || ! cluster_is_healthy ; then
|
---|
45 |
|
---|
46 | restart_ctdb
|
---|
47 | else
|
---|
48 | # This could be made unconditional but then we might get
|
---|
49 | # duplication from the recovery in restart_ctdb. We want to
|
---|
50 | # leave the recovery in restart_ctdb so that future tests that
|
---|
51 | # might do a manual restart mid-test will benefit.
|
---|
52 | echo "Forcing a recovery..."
|
---|
53 | onnode 0 $CTDB recover
|
---|
54 | fi
|
---|
55 |
|
---|
56 | exit $status
|
---|
57 | }
|
---|
58 |
|
---|
59 | ctdb_test_exit_hook_add ()
|
---|
60 | {
|
---|
61 | ctdb_test_exit_hook="${ctdb_test_exit_hook}${ctdb_test_exit_hook:+ ; }$*"
|
---|
62 | }
|
---|
63 |
|
---|
64 | ctdb_test_init ()
|
---|
65 | {
|
---|
66 | scriptname=$(basename "$0")
|
---|
67 | testfailures=0
|
---|
68 | ctdb_test_restart_scheduled=false
|
---|
69 |
|
---|
70 | trap "ctdb_test_exit" 0
|
---|
71 | }
|
---|
72 |
|
---|
73 | ########################################
|
---|
74 |
|
---|
75 | # Sets: $out
|
---|
76 | try_command_on_node ()
|
---|
77 | {
|
---|
78 | local nodespec="$1" ; shift
|
---|
79 |
|
---|
80 | local verbose=false
|
---|
81 | local onnode_opts=""
|
---|
82 |
|
---|
83 | while [ "${nodespec#-}" != "$nodespec" ] ; do
|
---|
84 | if [ "$nodespec" = "-v" ] ; then
|
---|
85 | verbose=true
|
---|
86 | else
|
---|
87 | onnode_opts="${onnode_opts}${onnode_opts:+ }${nodespec}"
|
---|
88 | fi
|
---|
89 | nodespec="$1" ; shift
|
---|
90 | done
|
---|
91 |
|
---|
92 | local cmd="$*"
|
---|
93 |
|
---|
94 | out=$(onnode -q $onnode_opts "$nodespec" "$cmd" 2>&1) || {
|
---|
95 |
|
---|
96 | echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
|
---|
97 | echo "$out"
|
---|
98 | return 1
|
---|
99 | }
|
---|
100 |
|
---|
101 | if $verbose ; then
|
---|
102 | echo "Output of \"$cmd\":"
|
---|
103 | echo "$out"
|
---|
104 | fi
|
---|
105 | }
|
---|
106 |
|
---|
107 | sanity_check_output ()
|
---|
108 | {
|
---|
109 | local min_lines="$1"
|
---|
110 | local regexp="$2" # Should be anchored as necessary.
|
---|
111 | local output="$3"
|
---|
112 |
|
---|
113 | local ret=0
|
---|
114 |
|
---|
115 | local num_lines=$(echo "$output" | wc -l)
|
---|
116 | echo "There are $num_lines lines of output"
|
---|
117 | if [ $num_lines -lt $min_lines ] ; then
|
---|
118 | echo "BAD: that's less than the required number (${min_lines})"
|
---|
119 | ret=1
|
---|
120 | fi
|
---|
121 |
|
---|
122 | local status=0
|
---|
123 | local unexpected # local doesn't pass through status of command on RHS.
|
---|
124 | unexpected=$(echo "$output" | egrep -v "$regexp") || status=$?
|
---|
125 |
|
---|
126 | # Note that this is reversed.
|
---|
127 | if [ $status -eq 0 ] ; then
|
---|
128 | echo "BAD: unexpected lines in output:"
|
---|
129 | echo "$unexpected" | cat -A
|
---|
130 | ret=1
|
---|
131 | else
|
---|
132 | echo "Output lines look OK"
|
---|
133 | fi
|
---|
134 |
|
---|
135 | return $ret
|
---|
136 | }
|
---|
137 |
|
---|
138 | sanity_check_ips ()
|
---|
139 | {
|
---|
140 | local ips="$1" # list of "ip node" lines
|
---|
141 |
|
---|
142 | echo "Sanity checking IPs..."
|
---|
143 |
|
---|
144 | local x ipp prev
|
---|
145 | prev=""
|
---|
146 | while read x ipp ; do
|
---|
147 | [ "$ipp" = "-1" ] && break
|
---|
148 | if [ -n "$prev" -a "$ipp" != "$prev" ] ; then
|
---|
149 | echo "OK"
|
---|
150 | return 0
|
---|
151 | fi
|
---|
152 | prev="$ipp"
|
---|
153 | done <<<"$ips"
|
---|
154 |
|
---|
155 | echo "BAD: a node was -1 or IPs are only assigned to one node:"
|
---|
156 | echo "$ips"
|
---|
157 | echo "Are you running an old version of CTDB?"
|
---|
158 | return 1
|
---|
159 | }
|
---|
160 |
|
---|
161 | # This returns a list of "ip node" lines in $out
|
---|
162 | all_ips_on_node()
|
---|
163 | {
|
---|
164 | local node="$1"
|
---|
165 | try_command_on_node $node \
|
---|
166 | "$CTDB ip -X | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
|
---|
167 | }
|
---|
168 |
|
---|
169 | _select_test_node_and_ips ()
|
---|
170 | {
|
---|
171 | try_command_on_node any \
|
---|
172 | "$CTDB ip -X all | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
|
---|
173 |
|
---|
174 | test_node="" # this matches no PNN
|
---|
175 | test_node_ips=""
|
---|
176 | local ip pnn
|
---|
177 | while read ip pnn ; do
|
---|
178 | if [ -z "$test_node" -a "$pnn" != "-1" ] ; then
|
---|
179 | test_node="$pnn"
|
---|
180 | fi
|
---|
181 | if [ "$pnn" = "$test_node" ] ; then
|
---|
182 | test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
|
---|
183 | fi
|
---|
184 | done <<<"$out" # bashism to avoid problem setting variable in pipeline.
|
---|
185 |
|
---|
186 | echo "Selected node ${test_node} with IPs: ${test_node_ips}."
|
---|
187 | test_ip="${test_node_ips%% *}"
|
---|
188 |
|
---|
189 | case "$test_ip" in
|
---|
190 | *:*) test_prefix="${test_ip}/128" ;;
|
---|
191 | *) test_prefix="${test_ip}/32" ;;
|
---|
192 | esac
|
---|
193 |
|
---|
194 | [ -n "$test_node" ] || return 1
|
---|
195 | }
|
---|
196 |
|
---|
197 | select_test_node_and_ips ()
|
---|
198 | {
|
---|
199 | local timeout=10
|
---|
200 | while ! _select_test_node_and_ips ; do
|
---|
201 | echo "Unable to find a test node with IPs assigned"
|
---|
202 | if [ $timeout -le 0 ] ; then
|
---|
203 | echo "BAD: Too many attempts"
|
---|
204 | return 1
|
---|
205 | fi
|
---|
206 | sleep_for 1
|
---|
207 | timeout=$(($timeout - 1))
|
---|
208 | done
|
---|
209 |
|
---|
210 | return 0
|
---|
211 | }
|
---|
212 |
|
---|
213 | # Sets: mask, iface
|
---|
214 | get_test_ip_mask_and_iface ()
|
---|
215 | {
|
---|
216 | # Find the interface
|
---|
217 | try_command_on_node $test_node "$CTDB ip -v -X | awk -F'|' -v ip=$test_ip '\$2 == ip { print \$4 }'"
|
---|
218 | iface="$out"
|
---|
219 |
|
---|
220 | if [ -z "$TEST_LOCAL_DAEMONS" ] ; then
|
---|
221 | # Find the netmask
|
---|
222 | try_command_on_node $test_node ip addr show to $test_ip
|
---|
223 | mask="${out##*/}"
|
---|
224 | mask="${mask%% *}"
|
---|
225 | else
|
---|
226 | mask="24"
|
---|
227 | fi
|
---|
228 |
|
---|
229 | echo "$test_ip/$mask is on $iface"
|
---|
230 | }
|
---|
231 |
|
---|
232 | ctdb_get_all_pnns ()
|
---|
233 | {
|
---|
234 | try_command_on_node -q all "$CTDB pnn | sed -e 's@PNN:@@'"
|
---|
235 | all_pnns="$out"
|
---|
236 | }
|
---|
237 |
|
---|
238 | # The subtlety is that "ctdb delip" will fail if the IP address isn't
|
---|
239 | # configured on a node...
|
---|
240 | delete_ip_from_all_nodes ()
|
---|
241 | {
|
---|
242 | _ip="$1"
|
---|
243 |
|
---|
244 | ctdb_get_all_pnns
|
---|
245 |
|
---|
246 | _nodes=""
|
---|
247 |
|
---|
248 | for _pnn in $all_pnns ; do
|
---|
249 | all_ips_on_node $_pnn
|
---|
250 | while read _i _n ; do
|
---|
251 | if [ "$_ip" = "$_i" ] ; then
|
---|
252 | _nodes="${_nodes}${_nodes:+,}${_pnn}"
|
---|
253 | fi
|
---|
254 | done <<<"$out" # bashism
|
---|
255 | done
|
---|
256 |
|
---|
257 | try_command_on_node -pq "$_nodes" "$CTDB delip $_ip"
|
---|
258 | }
|
---|
259 |
|
---|
260 | #######################################
|
---|
261 |
|
---|
262 | # Wait until either timeout expires or command succeeds. The command
|
---|
263 | # will be tried once per second, unless timeout has format T/I, where
|
---|
264 | # I is the recheck interval.
|
---|
265 | wait_until ()
|
---|
266 | {
|
---|
267 | local timeout="$1" ; shift # "$@" is the command...
|
---|
268 |
|
---|
269 | local interval=1
|
---|
270 | case "$timeout" in
|
---|
271 | */*)
|
---|
272 | interval="${timeout#*/}"
|
---|
273 | timeout="${timeout%/*}"
|
---|
274 | esac
|
---|
275 |
|
---|
276 | local negate=false
|
---|
277 | if [ "$1" = "!" ] ; then
|
---|
278 | negate=true
|
---|
279 | shift
|
---|
280 | fi
|
---|
281 |
|
---|
282 | echo -n "<${timeout}|"
|
---|
283 | local t=$timeout
|
---|
284 | while [ $t -gt 0 ] ; do
|
---|
285 | local rc=0
|
---|
286 | "$@" || rc=$?
|
---|
287 | if { ! $negate && [ $rc -eq 0 ] ; } || \
|
---|
288 | { $negate && [ $rc -ne 0 ] ; } ; then
|
---|
289 | echo "|$(($timeout - $t))|"
|
---|
290 | echo "OK"
|
---|
291 | return 0
|
---|
292 | fi
|
---|
293 | local i
|
---|
294 | for i in $(seq 1 $interval) ; do
|
---|
295 | echo -n .
|
---|
296 | done
|
---|
297 | t=$(($t - $interval))
|
---|
298 | sleep $interval
|
---|
299 | done
|
---|
300 |
|
---|
301 | echo "*TIMEOUT*"
|
---|
302 |
|
---|
303 | return 1
|
---|
304 | }
|
---|
305 |
|
---|
306 | sleep_for ()
|
---|
307 | {
|
---|
308 | echo -n "=${1}|"
|
---|
309 | for i in $(seq 1 $1) ; do
|
---|
310 | echo -n '.'
|
---|
311 | sleep 1
|
---|
312 | done
|
---|
313 | echo '|'
|
---|
314 | }
|
---|
315 |
|
---|
316 | _cluster_is_healthy ()
|
---|
317 | {
|
---|
318 | $CTDB nodestatus all >/dev/null
|
---|
319 | }
|
---|
320 |
|
---|
321 | _cluster_is_recovered ()
|
---|
322 | {
|
---|
323 | node_has_status all recovered
|
---|
324 | }
|
---|
325 |
|
---|
326 | _cluster_is_ready ()
|
---|
327 | {
|
---|
328 | _cluster_is_healthy && _cluster_is_recovered
|
---|
329 | }
|
---|
330 |
|
---|
331 | cluster_is_healthy ()
|
---|
332 | {
|
---|
333 | if onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
|
---|
334 | echo "Cluster is HEALTHY"
|
---|
335 | if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_recovered ; then
|
---|
336 | echo "WARNING: cluster in recovery mode!"
|
---|
337 | fi
|
---|
338 | return 0
|
---|
339 | else
|
---|
340 | echo "Cluster is UNHEALTHY"
|
---|
341 | if ! ${ctdb_test_restart_scheduled:-false} ; then
|
---|
342 | echo "DEBUG AT $(date '+%F %T'):"
|
---|
343 | local i
|
---|
344 | for i in "onnode -q 0 $CTDB status" "onnode -q 0 onnode all $CTDB scriptstatus" ; do
|
---|
345 | echo "$i"
|
---|
346 | $i || true
|
---|
347 | done
|
---|
348 | fi
|
---|
349 | return 1
|
---|
350 | fi
|
---|
351 | }
|
---|
352 |
|
---|
353 | wait_until_ready ()
|
---|
354 | {
|
---|
355 | local timeout="${1:-120}"
|
---|
356 |
|
---|
357 | echo "Waiting for cluster to become ready..."
|
---|
358 |
|
---|
359 | wait_until $timeout onnode -q any $CTDB_TEST_WRAPPER _cluster_is_ready
|
---|
360 | }
|
---|
361 |
|
---|
362 | # This function is becoming nicely overloaded. Soon it will collapse! :-)
|
---|
363 | node_has_status ()
|
---|
364 | {
|
---|
365 | local pnn="$1"
|
---|
366 | local status="$2"
|
---|
367 |
|
---|
368 | local bits fpat mpat rpat
|
---|
369 | case "$status" in
|
---|
370 | (unhealthy) bits="?|?|?|1|*" ;;
|
---|
371 | (healthy) bits="?|?|?|0|*" ;;
|
---|
372 | (disconnected) bits="1|*" ;;
|
---|
373 | (connected) bits="0|*" ;;
|
---|
374 | (banned) bits="?|1|*" ;;
|
---|
375 | (unbanned) bits="?|0|*" ;;
|
---|
376 | (disabled) bits="?|?|1|*" ;;
|
---|
377 | (enabled) bits="?|?|0|*" ;;
|
---|
378 | (stopped) bits="?|?|?|?|1|*" ;;
|
---|
379 | (notstopped) bits="?|?|?|?|0|*" ;;
|
---|
380 | (frozen) fpat='^[[:space:]]+frozen[[:space:]]+1$' ;;
|
---|
381 | (unfrozen) fpat='^[[:space:]]+frozen[[:space:]]+0$' ;;
|
---|
382 | (monon) mpat='^Monitoring mode:ACTIVE \(0\)$' ;;
|
---|
383 | (monoff) mpat='^Monitoring mode:DISABLED \(1\)$' ;;
|
---|
384 | (recovered) rpat='^Recovery mode:RECOVERY \(1\)$' ;;
|
---|
385 | *)
|
---|
386 | echo "node_has_status: unknown status \"$status\""
|
---|
387 | return 1
|
---|
388 | esac
|
---|
389 |
|
---|
390 | if [ -n "$bits" ] ; then
|
---|
391 | local out x line
|
---|
392 |
|
---|
393 | out=$($CTDB -X status 2>&1) || return 1
|
---|
394 |
|
---|
395 | {
|
---|
396 | read x
|
---|
397 | while read line ; do
|
---|
398 | # This needs to be done in 2 steps to avoid false matches.
|
---|
399 | local line_bits="${line#|${pnn}|*|}"
|
---|
400 | [ "$line_bits" = "$line" ] && continue
|
---|
401 | [ "${line_bits#${bits}}" != "$line_bits" ] && return 0
|
---|
402 | done
|
---|
403 | return 1
|
---|
404 | } <<<"$out" # Yay bash!
|
---|
405 | elif [ -n "$fpat" ] ; then
|
---|
406 | $CTDB statistics -n "$pnn" | egrep -q "$fpat"
|
---|
407 | elif [ -n "$mpat" ] ; then
|
---|
408 | $CTDB getmonmode -n "$pnn" | egrep -q "$mpat"
|
---|
409 | elif [ -n "$rpat" ] ; then
|
---|
410 | ! $CTDB status -n "$pnn" | egrep -q "$rpat"
|
---|
411 | else
|
---|
412 | echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
|
---|
413 | return 1
|
---|
414 | fi
|
---|
415 | }
|
---|
416 |
|
---|
417 | wait_until_node_has_status ()
|
---|
418 | {
|
---|
419 | local pnn="$1"
|
---|
420 | local status="$2"
|
---|
421 | local timeout="${3:-30}"
|
---|
422 | local proxy_pnn="${4:-any}"
|
---|
423 |
|
---|
424 | echo "Waiting until node $pnn has status \"$status\"..."
|
---|
425 |
|
---|
426 | if ! wait_until $timeout onnode $proxy_pnn $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
|
---|
427 | for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
|
---|
428 | echo "$i"
|
---|
429 | $i || true
|
---|
430 | done
|
---|
431 |
|
---|
432 | return 1
|
---|
433 | fi
|
---|
434 |
|
---|
435 | }
|
---|
436 |
|
---|
437 | # Useful for superficially testing IP failover.
|
---|
438 | # IPs must be on the given node.
|
---|
439 | # If the first argument is '!' then the IPs must not be on the given node.
|
---|
440 | ips_are_on_node ()
|
---|
441 | {
|
---|
442 | local negating=false
|
---|
443 | if [ "$1" = "!" ] ; then
|
---|
444 | negating=true ; shift
|
---|
445 | fi
|
---|
446 | local node="$1" ; shift
|
---|
447 | local ips="$*"
|
---|
448 |
|
---|
449 | local out
|
---|
450 |
|
---|
451 | all_ips_on_node $node
|
---|
452 |
|
---|
453 | local check
|
---|
454 | for check in $ips ; do
|
---|
455 | local ip pnn
|
---|
456 | while read ip pnn ; do
|
---|
457 | if [ "$check" = "$ip" ] ; then
|
---|
458 | if [ "$pnn" = "$node" ] ; then
|
---|
459 | if $negating ; then return 1 ; fi
|
---|
460 | else
|
---|
461 | if ! $negating ; then return 1 ; fi
|
---|
462 | fi
|
---|
463 | ips="${ips/${ip}}" # Remove from list
|
---|
464 | break
|
---|
465 | fi
|
---|
466 | # If we're negating and we didn't see the address then it
|
---|
467 | # isn't hosted by anyone!
|
---|
468 | if $negating ; then
|
---|
469 | ips="${ips/${check}}"
|
---|
470 | fi
|
---|
471 | done <<<"$out" # bashism to avoid problem setting variable in pipeline.
|
---|
472 | done
|
---|
473 |
|
---|
474 | ips="${ips// }" # Remove any spaces.
|
---|
475 | [ -z "$ips" ]
|
---|
476 | }
|
---|
477 |
|
---|
478 | wait_until_ips_are_on_node ()
|
---|
479 | {
|
---|
480 | # Go to some trouble to print a use description of what is happening
|
---|
481 | local not=""
|
---|
482 | if [ "$1" == "!" ] ; then
|
---|
483 | not="no longer "
|
---|
484 | fi
|
---|
485 | local node=""
|
---|
486 | local ips=""
|
---|
487 | local i
|
---|
488 | for i ; do
|
---|
489 | [ "$i" != "!" ] || continue
|
---|
490 | if [ -z "$node" ] ; then
|
---|
491 | node="$i"
|
---|
492 | continue
|
---|
493 | fi
|
---|
494 | ips="${ips}${ips:+, }${i}"
|
---|
495 | done
|
---|
496 | echo "Waiting for ${ips} to ${not}be assigned to node ${node}"
|
---|
497 |
|
---|
498 | wait_until 60 ips_are_on_node "$@"
|
---|
499 | }
|
---|
500 |
|
---|
501 | node_has_some_ips ()
|
---|
502 | {
|
---|
503 | local node="$1"
|
---|
504 |
|
---|
505 | local out
|
---|
506 |
|
---|
507 | all_ips_on_node $node
|
---|
508 |
|
---|
509 | while read ip pnn ; do
|
---|
510 | if [ "$node" = "$pnn" ] ; then
|
---|
511 | return 0
|
---|
512 | fi
|
---|
513 | done <<<"$out" # bashism to avoid problem setting variable in pipeline.
|
---|
514 |
|
---|
515 | return 1
|
---|
516 | }
|
---|
517 |
|
---|
518 | wait_until_node_has_some_ips ()
|
---|
519 | {
|
---|
520 | echo "Waiting for some IPs to be assigned to node ${test_node}"
|
---|
521 |
|
---|
522 | wait_until 60 node_has_some_ips "$@"
|
---|
523 | }
|
---|
524 |
|
---|
525 | #######################################
|
---|
526 |
|
---|
527 | _ctdb_hack_options ()
|
---|
528 | {
|
---|
529 | local ctdb_options="$*"
|
---|
530 |
|
---|
531 | case "$ctdb_options" in
|
---|
532 | *--start-as-stopped*)
|
---|
533 | export CTDB_START_AS_STOPPED="yes"
|
---|
534 | esac
|
---|
535 | }
|
---|
536 |
|
---|
537 | restart_ctdb_1 ()
|
---|
538 | {
|
---|
539 | _ctdb_hack_options "$@"
|
---|
540 |
|
---|
541 | if [ -e /etc/redhat-release ] ; then
|
---|
542 | service ctdb restart
|
---|
543 | else
|
---|
544 | /etc/init.d/ctdb restart
|
---|
545 | fi
|
---|
546 | }
|
---|
547 |
|
---|
548 | # Restart CTDB on all nodes. Override for local daemons.
|
---|
549 | _restart_ctdb_all ()
|
---|
550 | {
|
---|
551 | onnode -p all $CTDB_TEST_WRAPPER restart_ctdb_1 "$@"
|
---|
552 | }
|
---|
553 |
|
---|
554 | # Nothing needed for a cluster. Override for local daemons.
|
---|
555 | setup_ctdb ()
|
---|
556 | {
|
---|
557 | :
|
---|
558 | }
|
---|
559 |
|
---|
560 | restart_ctdb ()
|
---|
561 | {
|
---|
562 | # "$@" is passed to restart_ctdb_all.
|
---|
563 |
|
---|
564 | echo -n "Restarting CTDB"
|
---|
565 | if $ctdb_test_restart_scheduled ; then
|
---|
566 | echo -n " (scheduled)"
|
---|
567 | fi
|
---|
568 | echo "..."
|
---|
569 |
|
---|
570 | local i
|
---|
571 | for i in $(seq 1 5) ; do
|
---|
572 | _restart_ctdb_all "$@" || {
|
---|
573 | echo "Restart failed. Trying again in a few seconds..."
|
---|
574 | sleep_for 5
|
---|
575 | continue
|
---|
576 | }
|
---|
577 |
|
---|
578 | wait_until_ready || {
|
---|
579 | echo "Cluster didn't become ready. Restarting..."
|
---|
580 | continue
|
---|
581 | }
|
---|
582 |
|
---|
583 | echo "Setting RerecoveryTimeout to 1"
|
---|
584 | onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
|
---|
585 |
|
---|
586 | # In recent versions of CTDB, forcing a recovery like this
|
---|
587 | # blocks until the recovery is complete. Hopefully this will
|
---|
588 | # help the cluster to stabilise before a subsequent test.
|
---|
589 | echo "Forcing a recovery..."
|
---|
590 | onnode -q 0 $CTDB recover
|
---|
591 | sleep_for 2
|
---|
592 |
|
---|
593 | if ! onnode -q any $CTDB_TEST_WRAPPER _cluster_is_recovered ; then
|
---|
594 | echo "Cluster has gone into recovery again, waiting..."
|
---|
595 | wait_until 30/2 onnode -q any $CTDB_TEST_WRAPPER _cluster_is_recovered
|
---|
596 | fi
|
---|
597 |
|
---|
598 |
|
---|
599 | # Cluster is still healthy. Good, we're done!
|
---|
600 | if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
|
---|
601 | echo "Cluster became UNHEALTHY again [$(date)]"
|
---|
602 | onnode -p all ctdb status -X 2>&1
|
---|
603 | onnode -p all ctdb scriptstatus 2>&1
|
---|
604 | echo "Restarting..."
|
---|
605 | continue
|
---|
606 | fi
|
---|
607 |
|
---|
608 | echo "Doing a sync..."
|
---|
609 | onnode -q 0 $CTDB sync
|
---|
610 |
|
---|
611 | echo "ctdb is ready"
|
---|
612 | return 0
|
---|
613 | done
|
---|
614 |
|
---|
615 | echo "Cluster UNHEALTHY... too many attempts..."
|
---|
616 | onnode -p all ctdb status -X 2>&1
|
---|
617 | onnode -p all ctdb scriptstatus 2>&1
|
---|
618 |
|
---|
619 | # Try to make the calling test fail
|
---|
620 | status=1
|
---|
621 | return 1
|
---|
622 | }
|
---|
623 |
|
---|
624 | # Does nothing on cluster and should be overridden for local daemons
|
---|
625 | maybe_stop_ctdb ()
|
---|
626 | {
|
---|
627 | :
|
---|
628 | }
|
---|
629 |
|
---|
630 | ctdb_restart_when_done ()
|
---|
631 | {
|
---|
632 | ctdb_test_restart_scheduled=true
|
---|
633 | }
|
---|
634 |
|
---|
635 | get_ctdbd_command_line_option ()
|
---|
636 | {
|
---|
637 | local pnn="$1"
|
---|
638 | local option="$2"
|
---|
639 |
|
---|
640 | try_command_on_node "$pnn" "$CTDB getpid" || \
|
---|
641 | die "Unable to get PID of ctdbd on node $pnn"
|
---|
642 |
|
---|
643 | local pid="${out#*:}"
|
---|
644 | try_command_on_node "$pnn" "ps -p $pid -o args hww" || \
|
---|
645 | die "Unable to get command-line of PID $pid"
|
---|
646 |
|
---|
647 | # Strip everything up to and including --option
|
---|
648 | local t="${out#*--${option}}"
|
---|
649 | # Strip leading '=' or space if present
|
---|
650 | t="${t#=}"
|
---|
651 | t="${t# }"
|
---|
652 | # Strip any following options and print
|
---|
653 | echo "${t%% -*}"
|
---|
654 | }
|
---|
655 |
|
---|
656 | #######################################
|
---|
657 |
|
---|
658 | wait_for_monitor_event ()
|
---|
659 | {
|
---|
660 | local pnn="$1"
|
---|
661 | local timeout=120
|
---|
662 |
|
---|
663 | echo "Waiting for a monitor event on node ${pnn}..."
|
---|
664 |
|
---|
665 | try_command_on_node "$pnn" $CTDB scriptstatus || {
|
---|
666 | echo "Unable to get scriptstatus from node $pnn"
|
---|
667 | return 1
|
---|
668 | }
|
---|
669 |
|
---|
670 | local ctdb_scriptstatus_original="$out"
|
---|
671 | wait_until 120 _ctdb_scriptstatus_changed
|
---|
672 | }
|
---|
673 |
|
---|
674 | _ctdb_scriptstatus_changed ()
|
---|
675 | {
|
---|
676 | try_command_on_node "$pnn" $CTDB scriptstatus || {
|
---|
677 | echo "Unable to get scriptstatus from node $pnn"
|
---|
678 | return 1
|
---|
679 | }
|
---|
680 |
|
---|
681 | [ "$out" != "$ctdb_scriptstatus_original" ]
|
---|
682 | }
|
---|
683 |
|
---|
684 | #######################################
|
---|
685 |
|
---|
686 | nfs_test_setup ()
|
---|
687 | {
|
---|
688 | select_test_node_and_ips
|
---|
689 |
|
---|
690 | nfs_first_export=$(showmount -e $test_ip | sed -n -e '2s/ .*//p')
|
---|
691 |
|
---|
692 | echo "Creating test subdirectory..."
|
---|
693 | try_command_on_node $test_node "mktemp -d --tmpdir=$nfs_first_export"
|
---|
694 | nfs_test_dir="$out"
|
---|
695 | try_command_on_node $test_node "chmod 777 $nfs_test_dir"
|
---|
696 |
|
---|
697 | nfs_mnt_d=$(mktemp -d)
|
---|
698 | nfs_local_file="${nfs_mnt_d}/${nfs_test_dir##*/}/TEST_FILE"
|
---|
699 | nfs_remote_file="${nfs_test_dir}/TEST_FILE"
|
---|
700 |
|
---|
701 | ctdb_test_exit_hook_add nfs_test_cleanup
|
---|
702 |
|
---|
703 | echo "Mounting ${test_ip}:${nfs_first_export} on ${nfs_mnt_d} ..."
|
---|
704 | mount -o timeo=1,hard,intr,vers=3 \
|
---|
705 | "[${test_ip}]:${nfs_first_export}" ${nfs_mnt_d}
|
---|
706 | }
|
---|
707 |
|
---|
708 | nfs_test_cleanup ()
|
---|
709 | {
|
---|
710 | rm -f "$nfs_local_file"
|
---|
711 | umount -f "$nfs_mnt_d"
|
---|
712 | rmdir "$nfs_mnt_d"
|
---|
713 | onnode -q $test_node rmdir "$nfs_test_dir"
|
---|
714 | }
|
---|
715 |
|
---|
716 | #######################################
|
---|
717 |
|
---|
718 | # If the given IP is hosted then print 2 items: maskbits and iface
|
---|
719 | ip_maskbits_iface ()
|
---|
720 | {
|
---|
721 | _addr="$1"
|
---|
722 |
|
---|
723 | case "$_addr" in
|
---|
724 | *:*) _family="inet6" ; _bits=128 ;;
|
---|
725 | *) _family="inet" ; _bits=32 ;;
|
---|
726 | esac
|
---|
727 |
|
---|
728 | ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
|
---|
729 | awk -v family="${_family}" \
|
---|
730 | 'NR == 1 { iface = $2; sub(":$", "", iface) } \
|
---|
731 | $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
|
---|
732 | print mask, iface, family }'
|
---|
733 | }
|
---|
734 |
|
---|
735 | drop_ip ()
|
---|
736 | {
|
---|
737 | _addr="${1%/*}" # Remove optional maskbits
|
---|
738 |
|
---|
739 | set -- $(ip_maskbits_iface $_addr)
|
---|
740 | if [ -n "$1" ] ; then
|
---|
741 | _maskbits="$1"
|
---|
742 | _iface="$2"
|
---|
743 | echo "Removing public address $_addr/$_maskbits from device $_iface"
|
---|
744 | ip addr del "$_ip/$_maskbits" dev "$_iface" >/dev/null 2>&1 || true
|
---|
745 | fi
|
---|
746 | }
|
---|
747 |
|
---|
748 | drop_ips ()
|
---|
749 | {
|
---|
750 | for _ip ; do
|
---|
751 | drop_ip "$_ip"
|
---|
752 | done
|
---|
753 | }
|
---|
754 |
|
---|
755 | #######################################
|
---|
756 |
|
---|
757 | # $1: pnn, $2: DB name
|
---|
758 | db_get_path ()
|
---|
759 | {
|
---|
760 | try_command_on_node -v $1 $CTDB getdbstatus "$2" |
|
---|
761 | sed -n -e "s@^path: @@p"
|
---|
762 | }
|
---|
763 |
|
---|
764 | # $1: pnn, $2: DB name
|
---|
765 | db_ctdb_cattdb_count_records ()
|
---|
766 | {
|
---|
767 | try_command_on_node -v $1 $CTDB cattdb "$2" |
|
---|
768 | grep '^key' | grep -v '__db_sequence_number__' |
|
---|
769 | wc -l
|
---|
770 | }
|
---|
771 |
|
---|
772 | # $1: pnn, $2: DB name, $3: key string, $4: value string, $5: RSN (default 7)
|
---|
773 | db_ctdb_tstore ()
|
---|
774 | {
|
---|
775 | _tdb=$(db_get_path $1 "$2")
|
---|
776 | _rsn="${5:-7}"
|
---|
777 | try_command_on_node $1 $CTDB tstore "$_tdb" "$3" "$4" "$_rsn"
|
---|
778 | }
|
---|
779 |
|
---|
780 | # $1: pnn, $2: DB name, $3: dbseqnum (must be < 255!!!!!)
|
---|
781 | db_ctdb_tstore_dbseqnum ()
|
---|
782 | {
|
---|
783 | # "__db_sequence_number__" + trailing 0x00
|
---|
784 | _key='0x5f5f64625f73657175656e63655f6e756d6265725f5f00'
|
---|
785 |
|
---|
786 | # Construct 8 byte (unit64_t) database sequence number. This
|
---|
787 | # probably breaks if $3 > 255
|
---|
788 | _value=$(printf "0x%02x%014x" $3 0)
|
---|
789 |
|
---|
790 | db_ctdb_tstore $1 "$2" "$_key" "$_value"
|
---|
791 | }
|
---|
792 |
|
---|
793 | #######################################
|
---|
794 |
|
---|
795 | # Make sure that $CTDB is set.
|
---|
796 | : ${CTDB:=ctdb}
|
---|
797 |
|
---|
798 | local="${TEST_SUBDIR}/scripts/local.bash"
|
---|
799 | if [ -r "$local" ] ; then
|
---|
800 | . "$local"
|
---|
801 | fi
|
---|