Changeset 989 for vendor/current/ctdb
- Timestamp:
- Nov 25, 2016, 8:04:54 PM (9 years ago)
- Location:
- vendor/current/ctdb
- Files:
- 
      - 5 added
- 35 edited
 
 - 
          
  client/client.h (modified) (2 diffs)
- 
          
  client/client_control_sync.c (modified) (1 diff)
- 
          
  client/client_message.c (modified) (2 diffs)
- 
          
  common/pidfile.c (added)
- 
          
  common/pidfile.h (added)
- 
          
  common/rb_tree.c (modified) (3 diffs)
- 
          
  common/system_linux.c (modified) (4 diffs)
- 
          
  config/ctdb.service (modified) (1 diff)
- 
          
  config/ctdbd_wrapper (modified) (1 diff)
- 
          
  config/events.d/05.system (modified) (2 diffs)
- 
          
  doc/ctdb-tunables.7 (modified) (5 diffs)
- 
          
  doc/ctdb-tunables.7.html (modified) (3 diffs)
- 
          
  doc/ctdb-tunables.7.xml (modified) (9 diffs)
- 
          
  include/ctdb_private.h (modified) (2 diffs)
- 
          
  packaging/RPM/ctdb.spec.in (modified) (3 diffs)
- 
          
  protocol/protocol.h (modified) (8 diffs)
- 
          
  protocol/protocol_api.h (modified) (2 diffs)
- 
          
  protocol/protocol_client.c (modified) (1 diff)
- 
          
  protocol/protocol_control.c (modified) (12 diffs)
- 
          
  protocol/protocol_message.c (modified) (3 diffs)
- 
          
  protocol/protocol_private.h (modified) (1 diff)
- 
          
  protocol/protocol_types.c (modified) (2 diffs)
- 
          
  server/ctdb_control.c (modified) (1 diff)
- 
          
  server/ctdb_daemon.c (modified) (9 diffs)
- 
          
  server/ctdb_logging.c (modified) (1 diff)
- 
          
  server/ctdb_recover.c (modified) (4 diffs)
- 
          
  server/ctdb_recoverd.c (modified) (8 diffs)
- 
          
  server/ctdb_recovery_helper.c (modified) (50 diffs)
- 
          
  server/ctdb_takeover.c (modified) (26 diffs)
- 
          
  server/ctdb_tunables.c (modified) (2 diffs)
- 
          
  server/ctdbd.c (modified) (1 diff)
- 
          
  server/eventscript.c (modified) (4 diffs)
- 
          
  tests/cunit/pidfile_test_001.sh (added)
- 
          
  tests/simple/78_ctdb_large_db_recovery.sh (added)
- 
          
  tests/src/ctdbd_test.c (modified) (1 diff)
- 
          
  tests/src/pidfile_test.c (added)
- 
          
  tests/src/protocol_client_test.c (modified) (4 diffs)
- 
          
  tests/src/protocol_types_test.c (modified) (6 diffs)
- 
          
  utils/ping_pong/ping_pong.c (modified) (1 diff)
- 
          
  wscript (modified) (11 diffs)
 
Legend:
- Unmodified
- Added
- Removed
- 
      vendor/current/ctdb/client/client.hr988 r989 73 73 bool ctdb_client_message_recv(struct tevent_req *req, int *perr); 74 74 75 struct tevent_req *ctdb_client_message_multi_send( 76 TALLOC_CTX *mem_ctx, 77 struct tevent_context *ev, 78 struct ctdb_client_context *client, 79 uint32_t *pnn_list, int count, 80 struct ctdb_req_message *message); 81 82 bool ctdb_client_message_multi_recv(struct tevent_req *req, int *perr, 83 TALLOC_CTX *mem_ctx, int **perr_list); 84 75 85 int ctdb_client_message(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 76 86 struct ctdb_client_context *client, 77 87 uint32_t destnode, struct ctdb_req_message *message); 88 89 struct tevent_req *ctdb_client_set_message_handler_send( 90 TALLOC_CTX *mem_ctx, 91 struct tevent_context *ev, 92 struct ctdb_client_context *client, 93 uint64_t srvid, 94 srvid_handler_fn handler, 95 void *private_data); 96 bool ctdb_client_set_message_handler_recv(struct tevent_req *req, int *perr); 97 98 struct tevent_req *ctdb_client_remove_message_handler_send( 99 TALLOC_CTX *mem_ctx, 100 struct tevent_context *ev, 101 struct ctdb_client_context *client, 102 uint64_t srvid, 103 void *private_data); 104 bool ctdb_client_remove_message_handler_recv(struct tevent_req *req, 105 int *perr); 78 106 79 107 int ctdb_client_set_message_handler(TALLOC_CTX *mem_ctx, … … 710 738 uint32_t db_id); 711 739 740 int ctdb_ctrl_db_pull(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 741 struct ctdb_client_context *client, 742 int destnode, struct timeval timeout, 743 struct ctdb_pulldb_ext *pulldb, uint32_t *num_records); 744 745 int ctdb_ctrl_db_push_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 746 struct ctdb_client_context *client, 747 int destnode, struct timeval timeout, 748 struct ctdb_pulldb_ext *pulldb); 749 750 int ctdb_ctrl_db_push_confirm(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 751 struct ctdb_client_context *client, 752 int destnode, struct timeval timeout, 753 uint32_t db_id, uint32_t *num_records); 754 712 755 /* from client/client_db.c */ 713 756 
- 
      vendor/current/ctdb/client/client_control_sync.cr988 r989 3118 3118 return 0; 3119 3119 } 3120 3121 int ctdb_ctrl_db_pull(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 3122 struct ctdb_client_context *client, 3123 int destnode, struct timeval timeout, 3124 struct ctdb_pulldb_ext *pulldb, uint32_t *num_records) 3125 { 3126 struct ctdb_req_control request; 3127 struct ctdb_reply_control *reply; 3128 int ret; 3129 3130 ctdb_req_control_db_pull(&request, pulldb); 3131 ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout, 3132 &request, &reply); 3133 if (ret != 0) { 3134 DEBUG(DEBUG_ERR, 3135 ("Control DB_PULL failed to node %u, ret=%d\n", 3136 destnode, ret)); 3137 return ret; 3138 } 3139 3140 ret = ctdb_reply_control_db_pull(reply, num_records); 3141 if (ret != 0) { 3142 DEBUG(DEBUG_ERR, ("Control DB_PULL failed, ret=%d\n", ret)); 3143 return ret; 3144 } 3145 3146 return 0; 3147 } 3148 3149 int ctdb_ctrl_db_push_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 3150 struct ctdb_client_context *client, 3151 int destnode, struct timeval timeout, 3152 struct ctdb_pulldb_ext *pulldb) 3153 { 3154 struct ctdb_req_control request; 3155 struct ctdb_reply_control *reply; 3156 int ret; 3157 3158 ctdb_req_control_db_push_start(&request, pulldb); 3159 ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout, 3160 &request, &reply); 3161 if (ret != 0) { 3162 DEBUG(DEBUG_ERR, 3163 ("Control DB_PUSH failed to node %u, ret=%d\n", 3164 destnode, ret)); 3165 return ret; 3166 } 3167 3168 ret = ctdb_reply_control_db_push_start(reply); 3169 if (ret != 0) { 3170 DEBUG(DEBUG_ERR, 3171 ("Control DB_PUSH failed, ret=%d\n", ret)); 3172 return ret; 3173 } 3174 3175 return 0; 3176 } 3177 3178 int ctdb_ctrl_db_push_confirm(TALLOC_CTX *mem_ctx, struct tevent_context *ev, 3179 struct ctdb_client_context *client, 3180 int destnode, struct timeval timeout, 3181 uint32_t db_id, uint32_t *num_records) 3182 { 3183 struct ctdb_req_control request; 3184 struct ctdb_reply_control *reply; 3185 int ret; 3186 3187 ctdb_req_control_db_push_confirm(&request, db_id); 3188 ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout, 3189 &request, &reply); 3190 if (ret != 0) { 3191 DEBUG(DEBUG_ERR, 3192 ("Control DB_PUSH failed to node %u, ret=%d\n", 3193 destnode, ret)); 3194 return ret; 3195 } 3196 3197 ret = ctdb_reply_control_db_push_confirm(reply, num_records); 3198 if (ret != 0) { 3199 DEBUG(DEBUG_ERR, 3200 ("Control DB_PUSH failed, ret=%d\n", ret)); 3201 return ret; 3202 } 3203 3204 return 0; 3205 } 
- 
      vendor/current/ctdb/client/client_message.cr988 r989 158 158 159 159 /* 160 * Handle multiple nodes 161 */ 162 163 struct ctdb_client_message_multi_state { 164 uint32_t *pnn_list; 165 int count; 166 int done; 167 int err; 168 int *err_list; 169 }; 170 171 struct message_index_state { 172 struct tevent_req *req; 173 int index; 174 }; 175 176 static void ctdb_client_message_multi_done(struct tevent_req *subreq); 177 178 struct tevent_req *ctdb_client_message_multi_send( 179 TALLOC_CTX *mem_ctx, 180 struct tevent_context *ev, 181 struct ctdb_client_context *client, 182 uint32_t *pnn_list, int count, 183 struct ctdb_req_message *message) 184 { 185 struct tevent_req *req, *subreq; 186 struct ctdb_client_message_multi_state *state; 187 int i; 188 189 if (pnn_list == NULL || count == 0) { 190 return NULL; 191 } 192 193 req = tevent_req_create(mem_ctx, &state, 194 struct ctdb_client_message_multi_state); 195 if (req == NULL) { 196 return NULL; 197 } 198 199 state->pnn_list = pnn_list; 200 state->count = count; 201 state->done = 0; 202 state->err = 0; 203 state->err_list = talloc_zero_array(state, int, count); 204 if (tevent_req_nomem(state->err_list, req)) { 205 return tevent_req_post(req, ev); 206 } 207 208 for (i=0; i<count; i++) { 209 struct message_index_state *substate; 210 211 subreq = ctdb_client_message_send(state, ev, client, 212 pnn_list[i], message); 213 if (tevent_req_nomem(subreq, req)) { 214 return tevent_req_post(req, ev); 215 } 216 217 substate = talloc(subreq, struct message_index_state); 218 if (tevent_req_nomem(substate, req)) { 219 return tevent_req_post(req, ev); 220 } 221 222 substate->req = req; 223 substate->index = i; 224 225 tevent_req_set_callback(subreq, ctdb_client_message_multi_done, 226 substate); 227 } 228 229 return req; 230 } 231 232 static void ctdb_client_message_multi_done(struct tevent_req *subreq) 233 { 234 struct message_index_state *substate = tevent_req_callback_data( 235 subreq, struct message_index_state); 236 struct tevent_req *req = substate->req; 237 int idx = substate->index; 238 struct ctdb_client_message_multi_state *state = tevent_req_data( 239 req, struct ctdb_client_message_multi_state); 240 bool status; 241 int ret; 242 243 status = ctdb_client_message_recv(subreq, &ret); 244 TALLOC_FREE(subreq); 245 if (! status) { 246 if (state->err == 0) { 247 state->err = ret; 248 state->err_list[idx] = state->err; 249 } 250 } 251 252 state->done += 1; 253 254 if (state->done == state->count) { 255 tevent_req_done(req); 256 } 257 } 258 259 bool ctdb_client_message_multi_recv(struct tevent_req *req, int *perr, 260 TALLOC_CTX *mem_ctx, int **perr_list) 261 { 262 struct ctdb_client_message_multi_state *state = tevent_req_data( 263 req, struct ctdb_client_message_multi_state); 264 int err; 265 266 if (tevent_req_is_unix_error(req, &err)) { 267 if (perr != NULL) { 268 *perr = err; 269 } 270 if (perr_list != NULL) { 271 *perr_list = talloc_steal(mem_ctx, state->err_list); 272 } 273 return false; 274 } 275 276 if (perr != NULL) { 277 *perr = state->err; 278 } 279 280 if (perr_list != NULL) { 281 *perr_list = talloc_steal(mem_ctx, state->err_list); 282 } 283 284 if (state->err != 0) { 285 return false; 286 } 287 288 return true; 289 } 290 291 /* 160 292 * sync version of message send 161 293 */ … … 191 323 talloc_free(tmp_ctx); 192 324 return 0; 325 } 326 327 struct ctdb_client_set_message_handler_state { 328 struct ctdb_client_context *client; 329 uint64_t srvid; 330 srvid_handler_fn handler; 331 void *private_data; 332 }; 333 334 static void ctdb_client_set_message_handler_done(struct tevent_req *subreq); 335 336 struct tevent_req *ctdb_client_set_message_handler_send( 337 TALLOC_CTX *mem_ctx, 338 struct tevent_context *ev, 339 struct ctdb_client_context *client, 340 uint64_t srvid, 341 srvid_handler_fn handler, 342 void *private_data) 343 { 344 struct tevent_req *req, *subreq; 345 struct ctdb_client_set_message_handler_state *state; 346 struct ctdb_req_control request; 347 348 req = tevent_req_create(mem_ctx, &state, 349 struct ctdb_client_set_message_handler_state); 350 if (req == NULL) { 351 return NULL; 352 } 353 354 state->client = client; 355 state->srvid = srvid; 356 state->handler = handler; 357 state->private_data = private_data; 358 359 ctdb_req_control_register_srvid(&request, srvid); 360 subreq = ctdb_client_control_send(state, ev, client, client->pnn, 361 tevent_timeval_zero(), &request); 362 if (tevent_req_nomem(subreq, req)) { 363 return tevent_req_post(req, ev); 364 } 365 tevent_req_set_callback(subreq, ctdb_client_set_message_handler_done, 366 req); 367 368 return req; 369 } 370 371 static void ctdb_client_set_message_handler_done(struct tevent_req *subreq) 372 { 373 struct tevent_req *req = tevent_req_callback_data( 374 subreq, struct tevent_req); 375 struct ctdb_client_set_message_handler_state *state = tevent_req_data( 376 req, struct ctdb_client_set_message_handler_state); 377 struct ctdb_reply_control *reply; 378 bool status; 379 int ret; 380 381 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 382 TALLOC_FREE(subreq); 383 if (! status) { 384 tevent_req_error(req, ret); 385 return; 386 } 387 388 ret = ctdb_reply_control_register_srvid(reply); 389 talloc_free(reply); 390 if (ret != 0) { 391 tevent_req_error(req, ret); 392 return; 393 } 394 395 ret = srvid_register(state->client->srv, state->client, state->srvid, 396 state->handler, state->private_data); 397 if (ret != 0) { 398 tevent_req_error(req, ret); 399 return; 400 } 401 402 tevent_req_done(req); 403 } 404 405 bool ctdb_client_set_message_handler_recv(struct tevent_req *req, int *perr) 406 { 407 int err; 408 409 if (tevent_req_is_unix_error(req, &err)) { 410 if (perr != NULL) { 411 *perr = err; 412 } 413 return false; 414 } 415 return true; 416 } 417 418 struct ctdb_client_remove_message_handler_state { 419 struct ctdb_client_context *client; 420 uint64_t srvid; 421 void *private_data; 422 }; 423 424 static void ctdb_client_remove_message_handler_done(struct tevent_req *subreq); 425 426 struct tevent_req *ctdb_client_remove_message_handler_send( 427 TALLOC_CTX *mem_ctx, 428 struct tevent_context *ev, 429 struct ctdb_client_context *client, 430 uint64_t srvid, 431 void *private_data) 432 { 433 struct tevent_req *req, *subreq; 434 struct ctdb_client_remove_message_handler_state *state; 435 struct ctdb_req_control request; 436 437 req = tevent_req_create(mem_ctx, &state, 438 struct ctdb_client_remove_message_handler_state); 439 if (req == NULL) { 440 return NULL; 441 } 442 443 state->client = client; 444 state->srvid = srvid; 445 state->private_data = private_data; 446 447 ctdb_req_control_deregister_srvid(&request, srvid); 448 subreq = ctdb_client_control_send(state, ev, client, client->pnn, 449 tevent_timeval_zero(), &request); 450 if (tevent_req_nomem(subreq, req)) { 451 return tevent_req_post(req, ev); 452 } 453 tevent_req_set_callback(subreq, 454 ctdb_client_remove_message_handler_done, req); 455 456 return req; 457 } 458 459 static void ctdb_client_remove_message_handler_done(struct tevent_req *subreq) 460 { 461 struct tevent_req *req = tevent_req_callback_data( 462 subreq, struct tevent_req); 463 struct ctdb_client_remove_message_handler_state *state = tevent_req_data( 464 req, struct ctdb_client_remove_message_handler_state); 465 struct ctdb_reply_control *reply; 466 bool status; 467 int ret; 468 469 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 470 TALLOC_FREE(subreq); 471 if (! status) { 472 tevent_req_error(req, ret); 473 return; 474 } 475 476 ret = ctdb_reply_control_deregister_srvid(reply); 477 talloc_free(reply); 478 if (ret != 0) { 479 tevent_req_error(req, ret); 480 return; 481 } 482 483 ret = srvid_deregister(state->client->srv, state->srvid, 484 state->private_data); 485 if (ret != 0) { 486 tevent_req_error(req, ret); 487 return; 488 } 489 490 tevent_req_done(req); 491 } 492 493 bool ctdb_client_remove_message_handler_recv(struct tevent_req *req, int *perr) 494 { 495 int err; 496 497 if (tevent_req_is_unix_error(req, &err)) { 498 if (perr != NULL) { 499 *perr = err; 500 } 501 return false; 502 } 503 return true; 193 504 } 194 505 
- 
      vendor/current/ctdb/common/rb_tree.cr988 r989 217 217 static inline void trbt_set_color(trbt_node_t *node, int color) 218 218 { 219 if ( (node==NULL) && (color==TRBT_BLACK)) {219 if (node == NULL) { 220 220 return; 221 221 } … … 224 224 static inline void trbt_set_color_left(trbt_node_t *node, int color) 225 225 { 226 if ( ((node==NULL)||(node->left==NULL)) && (color==TRBT_BLACK)) {226 if (node == NULL || node->left == NULL) { 227 227 return; 228 228 } … … 231 231 static inline void trbt_set_color_right(trbt_node_t *node, int color) 232 232 { 233 if ( ((node==NULL)||(node->right==NULL)) && (color==TRBT_BLACK)) {233 if (node == NULL || node->right == NULL) { 234 234 return; 235 235 } 
- 
      vendor/current/ctdb/common/system_linux.cr988 r989 116 116 117 117 /* get the mac address */ 118 str ncpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)-1);118 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)); 119 119 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr); 120 120 if ( ret < 0 ) { … … 202 202 203 203 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s)); 204 str ncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));204 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); 205 205 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) { 206 206 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface)); … … 210 210 211 211 /* get the mac address */ 212 str ncpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)-1);212 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)); 213 213 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr); 214 214 if ( ret < 0 ) { … … 589 589 } 590 590 591 str ncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)-1);591 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); 592 592 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0 && errno == ENODEV) { 593 593 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface)); 
- 
      vendor/current/ctdb/config/ctdb.servicer988 r989 1 1 [Unit] 2 2 Description=CTDB 3 After=network .target3 After=network-online.target time-sync.target 4 4 5 5 [Service] 
- 
      vendor/current/ctdb/config/ctdbd_wrapperr988 r989 182 182 if [ -n "$_session" ] ; then 183 183 pkill -9 -s "$_session" 2>/dev/null 184 rm -f "$pidfile"185 184 fi 186 185 } 
- 
      vendor/current/ctdb/config/events.d/05.systemr988 r989 48 48 fi 49 49 50 if validate_percentage "$_warn_threshold" "$_ what" ; then50 if validate_percentage "$_warn_threshold" "$_thing" ; then 51 51 if [ "$_usage" -ge "$_warn_threshold" ] ; then 52 52 if [ -r "$_cache" ] ; then … … 145 145 END { 146 146 if (memavail != 0) { memfree = memavail ; } 147 print int((memtotal - memfree) / memtotal * 100),148 int((swaptotal - swapfree) / swaptotal * 100)147 if (memtotal != 0) { print int((memtotal - memfree) / memtotal * 100) ; } else { print 0 ; } 148 if (swaptotal != 0) { print int((swaptotal - swapfree) / swaptotal * 100) ; } else { print 0 ; } 149 149 }') 150 150 _mem_usage="$1" 
- 
      vendor/current/ctdb/doc/ctdb-tunables.7r988 r989 3 3 .\" Author: 4 4 .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/> 5 .\" Date: 0 1/27/20165 .\" Date: 09/22/2016 6 6 .\" Manual: CTDB - clustered TDB database 7 7 .\" Source: ctdb 8 8 .\" Language: English 9 9 .\" 10 .TH "CTDB\-TUNABLES" "7" "0 1/27/2016" "ctdb" "CTDB \- clustered TDB database"10 .TH "CTDB\-TUNABLES" "7" "09/22/2016" "ctdb" "CTDB \- clustered TDB database" 11 11 .\" ----------------------------------------------------------------- 12 12 .\" * Define some portability stuff … … 38 38 \fBgetvar\fR 39 39 commands for more details\&. 40 .SS "MaxRedirectCount" 40 .PP 41 The tunable variables are listed alphabetically\&. 42 .SS "AllowClientDBAttach" 43 .PP 44 Default: 1 45 .PP 46 When set to 0, clients are not allowed to attach to any databases\&. This can be used to temporarily block any new processes from attaching to and accessing the databases\&. This is mainly used for detaching a volatile database using \*(Aqctdb detach\*(Aq\&. 47 .SS "AllowUnhealthyDBRead" 48 .PP 49 Default: 0 50 .PP 51 When set to 1, ctdb allows database traverses to read unhealthy databases\&. By default, ctdb does not allow reading records from unhealthy databases\&. 52 .SS "ControlTimeout" 53 .PP 54 Default: 60 55 .PP 56 This is the default setting for timeout for when sending a control message to either the local or a remote ctdb daemon\&. 57 .SS "DatabaseHashSize" 58 .PP 59 Default: 100001 60 .PP 61 Number of the hash chains for the local store of the tdbs that ctdb manages\&. 62 .SS "DatabaseMaxDead" 63 .PP 64 Default: 5 65 .PP 66 Maximum number of dead records per hash chain for the tdb databses managed by ctdb\&. 67 .SS "DBRecordCountWarn" 68 .PP 69 Default: 100000 70 .PP 71 When set to non\-zero, ctdb will log a warning during recovery if a database has more than this many records\&. This will produce a warning if a database grows uncontrollably with orphaned records\&. 72 .SS "DBRecordSizeWarn" 73 .PP 74 Default: 10000000 75 .PP 76 When set to non\-zero, ctdb will log a warning during recovery if a single record is bigger than this size\&. This will produce a warning if a database record grows uncontrollably\&. 77 .SS "DBSizeWarn" 78 .PP 79 Default: 1000000000 80 .PP 81 When set to non\-zero, ctdb will log a warning during recovery if a database size is bigger than this\&. This will produce a warning if a database grows uncontrollably\&. 82 .SS "DeferredAttachTO" 83 .PP 84 Default: 120 85 .PP 86 When databases are frozen we do not allow clients to attach to the databases\&. Instead of returning an error immediately to the client, the attach request from the client is deferred until the database becomes available again at which stage we respond to the client\&. 87 .PP 88 This timeout controls how long we will defer the request from the client before timing it out and returning an error to the client\&. 89 .SS "DeterministicIPs" 90 .PP 91 Default: 0 92 .PP 93 When set to 1, ctdb will try to keep public IP addresses locked to specific nodes as far as possible\&. This makes it easier for debugging since you can know that as long as all nodes are healthy public IP X will always be hosted by node Y\&. 94 .PP 95 The cost of using deterministic IP address assignment is that it disables part of the logic where ctdb tries to reduce the number of public IP assignment changes in the cluster\&. This tunable may increase the number of IP failover/failbacks that are performed on the cluster by a small margin\&. 96 .SS "DisableIPFailover" 97 .PP 98 Default: 0 99 .PP 100 When set to non\-zero, ctdb will not perform failover or failback\&. Even if a node fails while holding public IPs, ctdb will not recover the IPs or assign them to another node\&. 101 .PP 102 When this tunable is enabled, ctdb will no longer attempt to recover the cluster by failing IP addresses over to other nodes\&. This leads to a service outage until the administrator has manually performed IP failover to replacement nodes using the \*(Aqctdb moveip\*(Aq command\&. 103 .SS "ElectionTimeout" 41 104 .PP 42 105 Default: 3 43 106 .PP 44 If we are not the DMASTER and need to fetch a record across the network we first send the request to the LMASTER after which the record is passed onto the current DMASTER\&. If the DMASTER changes before the request has reached that node, the request will be passed onto the "next" DMASTER\&. For very hot records that migrate rapidly across the cluster this can cause a request to "chase" the record for many hops before it catches up with the record\&. this is how many hops we allow trying to chase the DMASTER before we switch back to the LMASTER again to ask for new directions\&. 45 .PP 46 When chasing a record, this is how many hops we will chase the record for before going back to the LMASTER to ask for new guidance\&. 47 .SS "SeqnumInterval" 107 The number of seconds to wait for the election of recovery master to complete\&. If the election is not completed during this interval, then that round of election fails and ctdb starts a new election\&. 108 .SS "EnableBans" 109 .PP 110 Default: 1 111 .PP 112 This parameter allows ctdb to ban a node if the node is misbehaving\&. 113 .PP 114 When set to 0, this disables banning completely in the cluster and thus nodes can not get banned, even it they break\&. Don\*(Aqt set to 0 unless you know what you are doing\&. You should set this to the same value on all nodes to avoid unexpected behaviour\&. 115 .SS "EventScriptTimeout" 116 .PP 117 Default: 30 118 .PP 119 Maximum time in seconds to allow an event to run before timing out\&. This is the total time for all enabled scripts that are run for an event, not just a single event script\&. 120 .PP 121 Note that timeouts are ignored for some events ("takeip", "releaseip", "startrecovery", "recovered") and converted to success\&. The logic here is that the callers of these events implement their own additional timeout\&. 122 .SS "FetchCollapse" 123 .PP 124 Default: 1 125 .PP 126 This parameter is used to avoid multiple migration requests for the same record from a single node\&. All the record requests for the same record are queued up and processed when the record is migrated to the current node\&. 127 .PP 128 When many clients across many nodes try to access the same record at the same time this can lead to a fetch storm where the record becomes very active and bounces between nodes very fast\&. This leads to high CPU utilization of the ctdbd daemon, trying to bounce that record around very fast, and poor performance\&. This can improve performance and reduce CPU utilization for certain workloads\&. 129 .SS "HopcountMakeSticky" 130 .PP 131 Default: 50 132 .PP 133 For database(s) marked STICKY (using \*(Aqctdb setdbsticky\*(Aq), any record that is migrating so fast that hopcount exceeds this limit is marked as STICKY record for 134 \fIStickyDuration\fR 135 seconds\&. This means that after each migration the sticky record will be kept on the node 136 \fIStickyPindown\fRmilliseconds and prevented from being migrated off the node\&. 137 .PP 138 This will improve performance for certain workloads, such as locking\&.tdb if many clients are opening/closing the same file concurrently\&. 139 .SS "KeepaliveInterval" 140 .PP 141 Default: 5 142 .PP 143 How often in seconds should the nodes send keep\-alive packets to each other\&. 144 .SS "KeepaliveLimit" 145 .PP 146 Default: 5 147 .PP 148 After how many keepalive intervals without any traffic should a node wait until marking the peer as DISCONNECTED\&. 149 .PP 150 If a node has hung, it can take 151 \fIKeepaliveInterval\fR 152 * (\fIKeepaliveLimit\fR 153 + 1) seconds before ctdb determines that the node is DISCONNECTED and performs a recovery\&. This limit should not be set too high to enable early detection and avoid any application timeouts (e\&.g\&. SMB1) to kick in before the fail over is completed\&. 154 .SS "LCP2PublicIPs" 155 .PP 156 Default: 1 157 .PP 158 When set to 1, ctdb uses the LCP2 ip allocation algorithm\&. 159 .SS "LockProcessesPerDB" 160 .PP 161 Default: 200 162 .PP 163 This is the maximum number of lock helper processes ctdb will create for obtaining record locks\&. When ctdb cannot get a record lock without blocking, it creates a helper process that waits for the lock to be obtained\&. 164 .SS "LogLatencyMs" 165 .PP 166 Default: 0 167 .PP 168 When set to non\-zero, ctdb will log if certains operations take longer than this value, in milliseconds, to complete\&. These operations include "process a record request from client", "take a record or database lock", "update a persistent database record" and "vaccum a database"\&. 169 .SS "MaxQueueDropMsg" 170 .PP 171 Default: 1000000 172 .PP 173 This is the maximum number of messages to be queued up for a client before ctdb will treat the client as hung and will terminate the client connection\&. 174 .SS "MonitorInterval" 175 .PP 176 Default: 15 177 .PP 178 How often should ctdb run the \*(Aqmonitor\*(Aq event in seconds to check for a node\*(Aqs health\&. 179 .SS "MonitorTimeoutCount" 180 .PP 181 Default: 20 182 .PP 183 How many \*(Aqmonitor\*(Aq events in a row need to timeout before a node is flagged as UNHEALTHY\&. This setting is useful if scripts can not be written so that they do not hang for benign reasons\&. 184 .SS "NoIPFailback" 185 .PP 186 Default: 0 187 .PP 188 When set to 1, ctdb will not perform failback of IP addresses when a node becomes healthy\&. When a node becomes UNHEALTHY, ctdb WILL perform failover of public IP addresses, but when the node becomes HEALTHY again, ctdb will not fail the addresses back\&. 189 .PP 190 Use with caution! Normally when a node becomes available to the cluster ctdb will try to reassign public IP addresses onto the new node as a way to distribute the workload evenly across the clusternode\&. Ctdb tries to make sure that all running nodes have approximately the same number of public addresses it hosts\&. 191 .PP 192 When you enable this tunable, ctdb will no longer attempt to rebalance the cluster by failing IP addresses back to the new nodes\&. An unbalanced cluster will therefore remain unbalanced until there is manual intervention from the administrator\&. When this parameter is set, you can manually fail public IP addresses over to the new node(s) using the \*(Aqctdb moveip\*(Aq command\&. 193 .SS "NoIPHostOnAllDisabled" 194 .PP 195 Default: 0 196 .PP 197 If no nodes are HEALTHY then by default ctdb will happily host public IPs on disabled (unhealthy or administratively disabled) nodes\&. This can cause problems, for example if the underlying cluster filesystem is not mounted\&. When set to 1 on a node and that node is disabled, any IPs hosted by this node will be released and the node will not takeover any IPs until it is no longer disabled\&. 198 .SS "NoIPTakeover" 199 .PP 200 Default: 0 201 .PP 202 When set to 1, ctdb will not allow IP addresses to be failed over onto this node\&. Any IP addresses that the node currently hosts will remain on the node but no new IP addresses can be failed over to the node\&. 203 .SS "PullDBPreallocation" 204 .PP 205 Default: 10*1024*1024 206 .PP 207 This is the size of a record buffer to pre\-allocate for sending reply to PULLDB control\&. Usually record buffer starts with size of the first record and gets reallocated every time a new record is added to the record buffer\&. For a large number of records, this can be very inefficient to grow the record buffer one record at a time\&. 208 .SS "RecBufferSizeLimit" 209 .PP 210 Default: 1000000 211 .PP 212 This is the limit on the size of the record buffer to be sent in various controls\&. This limit is used by new controls used for recovery and controls used in vacuuming\&. 213 .SS "RecdFailCount" 214 .PP 215 Default: 10 216 .PP 217 If the recovery daemon has failed to ping the main dameon for this many consecutive intervals, the main daemon will consider the recovery daemon as hung and will try to restart it to recover\&. 218 .SS "RecdPingTimeout" 219 .PP 220 Default: 60 221 .PP 222 If the main dameon has not heard a "ping" from the recovery dameon for this many seconds, the main dameon will log a message that the recovery daemon is potentially hung\&. This also increments a counter which is checked against 223 \fIRecdFailCount\fR 224 for detection of hung recovery daemon\&. 225 .SS "RecLockLatencyMs" 48 226 .PP 49 227 Default: 1000 50 228 .PP 51 Some databases have seqnum tracking enabled, so that samba will be able to detect asynchronously when there has been updates to the database\&. Everytime a database is updated its sequence number is increased\&. 52 .PP 53 This tunable is used to specify in \*(Aqms\*(Aq how frequently ctdb will send out updates to remote nodes to inform them that the sequence number is increased\&. 54 .SS "ControlTimeout" 55 .PP 56 Default: 60 57 .PP 58 This is the default setting for timeout for when sending a control message to either the local or a remote ctdb daemon\&. 59 .SS "TraverseTimeout" 60 .PP 61 Default: 20 62 .PP 63 This setting controls how long we allow a traverse process to run\&. After this timeout triggers, the main ctdb daemon will abort the traverse if it has not yet finished\&. 64 .SS "KeepaliveInterval" 65 .PP 66 Default: 5 67 .PP 68 How often in seconds should the nodes send keepalives to eachother\&. 69 .SS "KeepaliveLimit" 70 .PP 71 Default: 5 72 .PP 73 After how many keepalive intervals without any traffic should a node wait until marking the peer as DISCONNECTED\&. 74 .PP 75 If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1) seconds before we determine that the node is DISCONNECTED and that we require a recovery\&. This limitshould not be set too high since we want a hung node to be detectec, and expunged from the cluster well before common CIFS timeouts (45\-90 seconds) kick in\&. 229 When using a reclock file for split brain prevention, if set to non\-zero this tunable will make the recovery dameon log a message if the fcntl() call to lock/testlock the recovery file takes longer than this number of milliseconds\&. 230 .SS "RecoverInterval" 231 .PP 232 Default: 1 233 .PP 234 How frequently in seconds should the recovery daemon perform the consistency checks to determine if it should perform a recovery\&. 235 .SS "RecoverPDBBySeqNum" 236 .PP 237 Default: 1 238 .PP 239 When set to zero, database recovery for persistent databases is record\-by\-record and recovery process simply collects the most recent version of every individual record\&. 240 .PP 241 When set to non\-zero, persistent databases will instead be recovered as a whole db and not by individual records\&. The node that contains the highest value stored in the record "__db_sequence_number__" is selected and the copy of that nodes database is used as the recovered database\&. 242 .PP 243 By default, recovery of persistent databses is done using __db_sequence_number__ record\&. 76 244 .SS "RecoverTimeout" 77 245 .PP 78 Default: 20246 Default: 120 79 247 .PP 80 248 This is the default setting for timeouts for controls when sent from the recovery daemon\&. We allow longer control timeouts from the recovery daemon than from normal use since the recovery dameon often use controls that can take a lot longer than normal controls\&. 81 .SS "RecoverInterval" 82 .PP 83 Default: 1 84 .PP 85 How frequently in seconds should the recovery daemon perform the consistency checks that determine if we need to perform a recovery or not\&. 86 .SS "ElectionTimeout" 87 .PP 88 Default: 3 89 .PP 90 When electing a new recovery master, this is how many seconds we allow the election to take before we either deem the election finished or we fail the election and start a new one\&. 91 .SS "TakeoverTimeout" 92 .PP 93 Default: 9 94 .PP 95 This is how many seconds we allow controls to take for IP failover events\&. 96 .SS "MonitorInterval" 97 .PP 98 Default: 15 99 .PP 100 How often should ctdb run the event scripts to check for a nodes health\&. 101 .SS "TickleUpdateInterval" 102 .PP 103 Default: 20 104 .PP 105 How often will ctdb record and store the "tickle" information used to kickstart stalled tcp connections after a recovery\&. 106 .SS "EventScriptTimeout" 107 .PP 108 Default: 30 109 .PP 110 Maximum time in seconds to allow an event to run before timing out\&. This is the total time for all enabled scripts that are run for an event, not just a single event script\&. 111 .PP 112 Note that timeouts are ignored for some events ("takeip", "releaseip", "startrecovery", "recovered") and converted to success\&. The logic here is that the callers of these events implement their own additional timeout\&. 113 .SS "MonitorTimeoutCount" 114 .PP 115 Default: 20 116 .PP 117 How many monitor events in a row need to timeout before a node is flagged as UNHEALTHY\&. This setting is useful if scripts can not be written so that they do not hang for benign reasons\&. 249 .SS "RecoveryBanPeriod" 250 .PP 251 Default: 300 252 .PP 253 The duration in seconds for which a node is banned if the node fails during recovery\&. After this time has elapsed the node will automatically get unbanned and will attempt to rejoin the cluster\&. 254 .PP 255 A node usually gets banned due to real problems with the node\&. Don\*(Aqt set this value too small\&. Otherwise, a problematic node will try to re\-join cluster too soon causing unnecessary recoveries\&. 256 .SS "RecoveryDropAllIPs" 257 .PP 258 Default: 120 259 .PP 260 If a node is stuck in recovery, or stopped, or banned, for this many seconds, then ctdb will release all public addresses on that node\&. 118 261 .SS "RecoveryGracePeriod" 119 262 .PP 120 263 Default: 120 121 264 .PP 122 During recoveries, if a node has not caused recovery failures during the last grace period, any records of transgressions that the node has caused recovery failures will be forgiven\&. This resets the ban\-counter back to zero for that node\&. 123 .SS "RecoveryBanPeriod" 124 .PP 125 Default: 300 126 .PP 127 If a node becomes banned causing repetitive recovery failures\&. The node will eventually become banned from the cluster\&. This controls how long the culprit node will be banned from the cluster before it is allowed to try to join the cluster again\&. Don\*(Aqt set to small\&. A node gets banned for a reason and it is usually due to real problems with the node\&. 128 .SS "DatabaseHashSize" 129 .PP 130 Default: 100001 131 .PP 132 Size of the hash chains for the local store of the tdbs that ctdb manages\&. 133 .SS "DatabaseMaxDead" 134 .PP 135 Default: 5 136 .PP 137 How many dead records per hashchain in the TDB database do we allow before the freelist needs to be processed\&. 138 .SS "RerecoveryTimeout" 139 .PP 140 Default: 10 141 .PP 142 Once a recovery has completed, no additional recoveries are permitted until this timeout has expired\&. 143 .SS "EnableBans" 144 .PP 145 Default: 1 146 .PP 147 When set to 0, this disables BANNING completely in the cluster and thus nodes can not get banned, even it they break\&. Don\*(Aqt set to 0 unless you know what you are doing\&. You should set this to the same value on all nodes to avoid unexpected behaviour\&. 148 .SS "DeterministicIPs" 149 .PP 150 Default: 0 151 .PP 152 When enabled, this tunable makes ctdb try to keep public IP addresses locked to specific nodes as far as possible\&. This makes it easier for debugging since you can know that as long as all nodes are healthy public IP X will always be hosted by node Y\&. 153 .PP 154 The cost of using deterministic IP address assignment is that it disables part of the logic where ctdb tries to reduce the number of public IP assignment changes in the cluster\&. This tunable may increase the number of IP failover/failbacks that are performed on the cluster by a small margin\&. 155 .SS "LCP2PublicIPs" 156 .PP 157 Default: 1 158 .PP 159 When enabled this switches ctdb to use the LCP2 ip allocation algorithm\&. 160 .SS "ReclockPingPeriod" 161 .PP 162 Default: x 163 .PP 164 Obsolete 165 .SS "NoIPFailback" 166 .PP 167 Default: 0 168 .PP 169 When set to 1, ctdb will not perform failback of IP addresses when a node becomes healthy\&. Ctdb WILL perform failover of public IP addresses when a node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb will not fail the addresses back\&. 170 .PP 171 Use with caution! Normally when a node becomes available to the cluster ctdb will try to reassign public IP addresses onto the new node as a way to distribute the workload evenly across the clusternode\&. Ctdb tries to make sure that all running nodes have approximately the same number of public addresses it hosts\&. 172 .PP 173 When you enable this tunable, CTDB will no longer attempt to rebalance the cluster by failing IP addresses back to the new nodes\&. An unbalanced cluster will therefore remain unbalanced until there is manual intervention from the administrator\&. When this parameter is set, you can manually fail public IP addresses over to the new node(s) using the \*(Aqctdb moveip\*(Aq command\&. 174 .SS "DisableIPFailover" 175 .PP 176 Default: 0 177 .PP 178 When enabled, ctdb will not perform failover or failback\&. Even if a node fails while holding public IPs, ctdb will not recover the IPs or assign them to another node\&. 179 .PP 180 When you enable this tunable, CTDB will no longer attempt to recover the cluster by failing IP addresses over to other nodes\&. This leads to a service outage until the administrator has manually performed failover to replacement nodes using the \*(Aqctdb moveip\*(Aq command\&. 181 .SS "NoIPTakeover" 182 .PP 183 Default: 0 184 .PP 185 When set to 1, ctdb will not allow IP addresses to be failed over onto this node\&. Any IP addresses that the node currently hosts will remain on the node but no new IP addresses can be failed over to the node\&. 186 .SS "NoIPHostOnAllDisabled" 187 .PP 188 Default: 0 189 .PP 190 If no nodes are healthy then by default ctdb will happily host public IPs on disabled (unhealthy or administratively disabled) nodes\&. This can cause problems, for example if the underlying cluster filesystem is not mounted\&. When set to 1 on a node and that node is disabled it, any IPs hosted by this node will be released and the node will not takeover any IPs until it is no longer disabled\&. 191 .SS "DBRecordCountWarn" 192 .PP 193 Default: 100000 194 .PP 195 When set to non\-zero, ctdb will log a warning when we try to recover a database with more than this many records\&. This will produce a warning if a database grows uncontrollably with orphaned records\&. 196 .SS "DBRecordSizeWarn" 197 .PP 198 Default: 10000000 199 .PP 200 When set to non\-zero, ctdb will log a warning when we try to recover a database where a single record is bigger than this\&. This will produce a warning if a database record grows uncontrollably with orphaned sub\-records\&. 201 .SS "DBSizeWarn" 202 .PP 203 Default: 1000000000 204 .PP 205 When set to non\-zero, ctdb will log a warning when we try to recover a database bigger than this\&. This will produce a warning if a database grows uncontrollably\&. 206 .SS "VerboseMemoryNames" 207 .PP 208 Default: 0 209 .PP 210 This feature consumes additional memory\&. when used the talloc library will create more verbose names for all talloc allocated objects\&. 211 .SS "RecdPingTimeout" 212 .PP 213 Default: 60 214 .PP 215 If the main dameon has not heard a "ping" from the recovery dameon for this many seconds, the main dameon will log a message that the recovery daemon is potentially hung\&. 216 .SS "RecdFailCount" 217 .PP 218 Default: 10 219 .PP 220 If the recovery daemon has failed to ping the main dameon for this many consecutive intervals, the main daemon will consider the recovery daemon as hung and will try to restart it to recover\&. 221 .SS "LogLatencyMs" 222 .PP 223 Default: 0 224 .PP 225 When set to non\-zero, this will make the main daemon log any operation that took longer than this value, in \*(Aqms\*(Aq, to complete\&. These include "how long time a lockwait child process needed", "how long time to write to a persistent database" but also "how long did it take to get a response to a CALL from a remote node"\&. 226 .SS "RecLockLatencyMs" 227 .PP 228 Default: 1000 229 .PP 230 When using a reclock file for split brain prevention, if set to non\-zero this tunable will make the recovery dameon log a message if the fcntl() call to lock/testlock the recovery file takes longer than this number of ms\&. 231 .SS "RecoveryDropAllIPs" 232 .PP 233 Default: 120 234 .PP 235 If we have been stuck in recovery, or stopped, or banned, mode for this many seconds we will force drop all held public addresses\&. 236 .SS "VacuumInterval" 237 .PP 238 Default: 10 239 .PP 240 Periodic interval in seconds when vacuuming is triggered for volatile databases\&. 241 .SS "VacuumMaxRunTime" 242 .PP 243 Default: 120 244 .PP 245 The maximum time in seconds for which the vacuuming process is allowed to run\&. If vacuuming process takes longer than this value, then the vacuuming process is terminated\&. 265 During recoveries, if a node has not caused recovery failures during the last grace period in seconds, any records of transgressions that the node has caused recovery failures will be forgiven\&. This resets the ban\-counter back to zero for that node\&. 246 266 .SS "RepackLimit" 247 267 .PP … … 249 269 .PP 250 270 During vacuuming, if the number of freelist records are more than 251 \fIRepackLimit\fR, then databases arerepacked to get rid of the freelist records to avoid fragmentation\&.271 \fIRepackLimit\fR, then the database is repacked to get rid of the freelist records to avoid fragmentation\&. 252 272 .PP 253 273 Databases are repacked only if both … … 256 276 \fIVacuumLimit\fR 257 277 are exceeded\&. 278 .SS "RerecoveryTimeout" 279 .PP 280 Default: 10 281 .PP 282 Once a recovery has completed, no additional recoveries are permitted until this timeout in seconds has expired\&. 283 .SS "Samba3AvoidDeadlocks" 284 .PP 285 Default: 0 286 .PP 287 If set to non\-zero, enable code that prevents deadlocks with Samba (only for Samba 3\&.x)\&. 288 .PP 289 This should be set to 1 only when using Samba version 3\&.x to enable special code in ctdb to avoid deadlock with Samba version 3\&.x\&. This code is not required for Samba version 4\&.x and must not be enabled for Samba 4\&.x\&. 290 .SS "SeqnumInterval" 291 .PP 292 Default: 1000 293 .PP 294 Some databases have seqnum tracking enabled, so that samba will be able to detect asynchronously when there has been updates to the database\&. Everytime a database is updated its sequence number is increased\&. 295 .PP 296 This tunable is used to specify in milliseconds how frequently ctdb will send out updates to remote nodes to inform them that the sequence number is increased\&. 297 .SS "StatHistoryInterval" 298 .PP 299 Default: 1 300 .PP 301 Granularity of the statistics collected in the statistics history\&. This is reported by \*(Aqctdb stats\*(Aq command\&. 302 .SS "StickyDuration" 303 .PP 304 Default: 600 305 .PP 306 Once a record has been marked STICKY, this is the duration in seconds, the record will be flagged as a STICKY record\&. 307 .SS "StickyPindown" 308 .PP 309 Default: 200 310 .PP 311 Once a STICKY record has been migrated onto a node, it will be pinned down on that node for this number of milliseconds\&. Any request from other nodes to migrate the record off the node will be deferred\&. 312 .SS "TakeoverTimeout" 313 .PP 314 Default: 9 315 .PP 316 This is the duration in seconds in which ctdb tries to complete IP failover\&. 317 .SS "TDBMutexEnabled" 318 .PP 319 Default: 0 320 .PP 321 This paramter enables TDB_MUTEX_LOCKING feature on volatile databases if the robust mutexes are supported\&. This optimizes the record locking using robust mutexes and is much more efficient that using posix locks\&. 322 .SS "TickleUpdateInterval" 323 .PP 324 Default: 20 325 .PP 326 Every 327 \fITickleUpdateInterval\fR 328 seconds, ctdb synchronizes the client connection information across nodes\&. 329 .SS "TraverseTimeout" 330 .PP 331 Default: 20 332 .PP 333 This is the duration in seconds for which a database traverse is allowed to run\&. If the traverse does not complete during this interval, ctdb will abort the traverse\&. 334 .SS "VacuumFastPathCount" 335 .PP 336 Default: 60 337 .PP 338 During a vacuuming run, ctdb usually processes only the records marked for deletion also called the fast path vacuuming\&. After finishing 339 \fIVacuumFastPathCount\fR 340 number of fast path vacuuming runs, ctdb will trigger a scan of complete database for any empty records that need to be deleted\&. 341 .SS "VacuumInterval" 342 .PP 343 Default: 10 344 .PP 345 Periodic interval in seconds when vacuuming is triggered for volatile databases\&. 258 346 .SS "VacuumLimit" 259 347 .PP … … 268 356 \fIVacuumLimit\fR 269 357 are exceeded\&. 270 .SS "VacuumFastPathCount" 271 .PP 272 Default: 60 273 .PP 274 When a record is deleted, it is marked for deletion during vacuuming\&. Vacuuming process usually processes this list to purge the records from the database\&. If the number of records marked for deletion are more than VacuumFastPathCount, then vacuuming process will scan the complete database for empty records instead of using the list of records marked for deletion\&. 275 .SS "DeferredAttachTO" 276 .PP 277 Default: 120 278 .PP 279 When databases are frozen we do not allow clients to attach to the databases\&. Instead of returning an error immediately to the application the attach request from the client is deferred until the database becomes available again at which stage we respond to the client\&. 280 .PP 281 This timeout controls how long we will defer the request from the client before timing it out and returning an error to the client\&. 282 .SS "HopcountMakeSticky" 283 .PP 284 Default: 50 285 .PP 286 If the database is set to \*(AqSTICKY\*(Aq mode, using the \*(Aqctdb setdbsticky\*(Aq command, any record that is seen as very hot and migrating so fast that hopcount surpasses 50 is set to become a STICKY record for StickyDuration seconds\&. This means that after each migration the record will be kept on the node and prevented from being migrated off the node\&. 287 .PP 288 This setting allows one to try to identify such records and stop them from migrating across the cluster so fast\&. This will improve performance for certain workloads, such as locking\&.tdb if many clients are opening/closing the same file concurrently\&. 289 .SS "StickyDuration" 290 .PP 291 Default: 600 292 .PP 293 Once a record has been found to be fetch\-lock hot and has been flagged to become STICKY, this is for how long, in seconds, the record will be flagged as a STICKY record\&. 294 .SS "StickyPindown" 295 .PP 296 Default: 200 297 .PP 298 Once a STICKY record has been migrated onto a node, it will be pinned down on that node for this number of ms\&. Any request from other nodes to migrate the record off the node will be deferred until the pindown timer expires\&. 299 .SS "StatHistoryInterval" 300 .PP 301 Default: 1 302 .PP 303 Granularity of the statistics collected in the statistics history\&. 304 .SS "AllowClientDBAttach" 305 .PP 306 Default: 1 307 .PP 308 When set to 0, clients are not allowed to attach to any databases\&. This can be used to temporarily block any new processes from attaching to and accessing the databases\&. 309 .SS "RecoverPDBBySeqNum" 310 .PP 311 Default: 1 312 .PP 313 When set to zero, database recovery for persistent databases is record\-by\-record and recovery process simply collects the most recent version of every individual record\&. 314 .PP 315 When set to non\-zero, persistent databases will instead be recovered as a whole db and not by individual records\&. The node that contains the highest value stored in the record "__db_sequence_number__" is selected and the copy of that nodes database is used as the recovered database\&. 316 .PP 317 By default, recovery of persistent databses is done using __db_sequence_number__ record\&. 318 .SS "FetchCollapse" 319 .PP 320 Default: 1 321 .PP 322 When many clients across many nodes try to access the same record at the same time this can lead to a fetch storm where the record becomes very active and bounces between nodes very fast\&. This leads to high CPU utilization of the ctdbd daemon, trying to bounce that record around very fast, and poor performance\&. 323 .PP 324 This parameter is used to activate a fetch\-collapse\&. A fetch\-collapse is when we track which records we have requests in flight so that we only keep one request in flight from a certain node, even if multiple smbd processes are attemtping to fetch the record at the same time\&. This can improve performance and reduce CPU utilization for certain workloads\&. 325 .PP 326 This timeout controls if we should collapse multiple fetch operations of the same record into a single request and defer all duplicates or not\&. 327 .SS "Samba3AvoidDeadlocks" 328 .PP 329 Default: 0 330 .PP 331 Enable code that prevents deadlocks with Samba (only for Samba 3\&.x)\&. 332 .PP 333 This should be set to 1 when using Samba version 3\&.x to enable special code in CTDB to avoid deadlock with Samba version 3\&.x\&. This code is not required for Samba version 4\&.x and must not be enabled for Samba 4\&.x\&. 358 .SS "VacuumMaxRunTime" 359 .PP 360 Default: 120 361 .PP 362 The maximum time in seconds for which the vacuuming process is allowed to run\&. If vacuuming process takes longer than this value, then the vacuuming process is terminated\&. 363 .SS "VerboseMemoryNames" 364 .PP 365 Default: 0 366 .PP 367 When set to non\-zero, ctdb assigns verbose names for some of the talloc allocated memory objects\&. These names are visible in the talloc memory report generated by \*(Aqctdb dumpmemory\*(Aq\&. 334 368 .SH "SEE ALSO" 335 369 .PP 
- 
      vendor/current/ctdb/doc/ctdb-tunables.7.htmlr988 r989 1 <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb-tunables</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdb-tunables.7"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb-tunables — CTDB tunable configuration variables</p></div><div class="refsect1"><a name="idp5 2032112"></a><h2>DESCRIPTION</h2><p>1 <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb-tunables</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdb-tunables.7"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb-tunables — CTDB tunable configuration variables</p></div><div class="refsect1"><a name="idp51068080"></a><h2>DESCRIPTION</h2><p> 2 2 CTDB's behaviour can be configured by setting run-time tunable 3 3 variables. This lists and describes all tunables. See the … … 5 5 <span class="command"><strong>listvars</strong></span>, <span class="command"><strong>setvar</strong></span> and 6 6 <span class="command"><strong>getvar</strong></span> commands for more details. 7 </p><div class="refsect2"><a name="idp52844128"></a><h3>MaxRedirectCount</h3><p>Default: 3</p><p> 8 If we are not the DMASTER and need to fetch a record across the network 9 we first send the request to the LMASTER after which the record 10 is passed onto the current DMASTER. If the DMASTER changes before 11 the request has reached that node, the request will be passed onto the 12 "next" DMASTER. For very hot records that migrate rapidly across the 13 cluster this can cause a request to "chase" the record for many hops 14 before it catches up with the record. 15 16 this is how many hops we allow trying to chase the DMASTER before we 17 switch back to the LMASTER again to ask for new directions. 18 </p><p> 19 When chasing a record, this is how many hops we will chase the record 20 for before going back to the LMASTER to ask for new guidance. 21 </p></div><div class="refsect2"><a name="idp52639696"></a><h3>SeqnumInterval</h3><p>Default: 1000</p><p> 22 Some databases have seqnum tracking enabled, so that samba will be able 23 to detect asynchronously when there has been updates to the database. 24 Everytime a database is updated its sequence number is increased. 25 </p><p> 26 This tunable is used to specify in 'ms' how frequently ctdb will 27 send out updates to remote nodes to inform them that the sequence 28 number is increased. 29 </p></div><div class="refsect2"><a name="idp52023488"></a><h3>ControlTimeout</h3><p>Default: 60</p><p> 30 This is the default 31 setting for timeout for when sending a control message to either the 32 local or a remote ctdb daemon. 33 </p></div><div class="refsect2"><a name="idp51243376"></a><h3>TraverseTimeout</h3><p>Default: 20</p><p> 34 This setting controls how long we allow a traverse process to run. 35 After this timeout triggers, the main ctdb daemon will abort the 36 traverse if it has not yet finished. 37 </p></div><div class="refsect2"><a name="idp50157008"></a><h3>KeepaliveInterval</h3><p>Default: 5</p><p> 38 How often in seconds should the nodes send keepalives to eachother. 39 </p></div><div class="refsect2"><a name="idp49234000"></a><h3>KeepaliveLimit</h3><p>Default: 5</p><p> 40 After how many keepalive intervals without any traffic should a node 41 wait until marking the peer as DISCONNECTED. 42 </p><p> 43 If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1) 44 seconds before we determine that the node is DISCONNECTED and that we 45 require a recovery. This limitshould not be set too high since we want 46 a hung node to be detectec, and expunged from the cluster well before 47 common CIFS timeouts (45-90 seconds) kick in. 48 </p></div><div class="refsect2"><a name="idp53887184"></a><h3>RecoverTimeout</h3><p>Default: 20</p><p> 49 This is the default setting for timeouts for controls when sent from the 50 recovery daemon. We allow longer control timeouts from the recovery daemon 51 than from normal use since the recovery dameon often use controls that 52 can take a lot longer than normal controls. 53 </p></div><div class="refsect2"><a name="idp53889072"></a><h3>RecoverInterval</h3><p>Default: 1</p><p> 54 How frequently in seconds should the recovery daemon perform the 55 consistency checks that determine if we need to perform a recovery or not. 56 </p></div><div class="refsect2"><a name="idp53890832"></a><h3>ElectionTimeout</h3><p>Default: 3</p><p> 57 When electing a new recovery master, this is how many seconds we allow 58 the election to take before we either deem the election finished 59 or we fail the election and start a new one. 60 </p></div><div class="refsect2"><a name="idp53892640"></a><h3>TakeoverTimeout</h3><p>Default: 9</p><p> 61 This is how many seconds we allow controls to take for IP failover events. 62 </p></div><div class="refsect2"><a name="idp53894240"></a><h3>MonitorInterval</h3><p>Default: 15</p><p> 63 How often should ctdb run the event scripts to check for a nodes health. 64 </p></div><div class="refsect2"><a name="idp53895840"></a><h3>TickleUpdateInterval</h3><p>Default: 20</p><p> 65 How often will ctdb record and store the "tickle" information used to 66 kickstart stalled tcp connections after a recovery. 67 </p></div><div class="refsect2"><a name="idp53897584"></a><h3>EventScriptTimeout</h3><p>Default: 30</p><p> 7 </p><p> 8 The tunable variables are listed alphabetically. 9 </p><div class="refsect2"><a name="idp51120048"></a><h3>AllowClientDBAttach</h3><p>Default: 1</p><p> 10 When set to 0, clients are not allowed to attach to any databases. 11 This can be used to temporarily block any new processes from 12 attaching to and accessing the databases. This is mainly used 13 for detaching a volatile database using 'ctdb detach'. 14 </p></div><div class="refsect2"><a name="idp53889776"></a><h3>AllowUnhealthyDBRead</h3><p>Default: 0</p><p> 15 When set to 1, ctdb allows database traverses to read unhealthy 16 databases. By default, ctdb does not allow reading records from 17 unhealthy databases. 18 </p></div><div class="refsect2"><a name="idp54131312"></a><h3>ControlTimeout</h3><p>Default: 60</p><p> 19 This is the default setting for timeout for when sending a 20 control message to either the local or a remote ctdb daemon. 21 </p></div><div class="refsect2"><a name="idp51364816"></a><h3>DatabaseHashSize</h3><p>Default: 100001</p><p> 22 Number of the hash chains for the local store of the tdbs that 23 ctdb manages. 24 </p></div><div class="refsect2"><a name="idp53157488"></a><h3>DatabaseMaxDead</h3><p>Default: 5</p><p> 25 Maximum number of dead records per hash chain for the tdb databses 26 managed by ctdb. 27 </p></div><div class="refsect2"><a name="idp50010288"></a><h3>DBRecordCountWarn</h3><p>Default: 100000</p><p> 28 When set to non-zero, ctdb will log a warning during recovery if 29 a database has more than this many records. This will produce a 30 warning if a database grows uncontrollably with orphaned records. 31 </p></div><div class="refsect2"><a name="idp49085760"></a><h3>DBRecordSizeWarn</h3><p>Default: 10000000</p><p> 32 When set to non-zero, ctdb will log a warning during recovery 33 if a single record is bigger than this size. This will produce 34 a warning if a database record grows uncontrollably. 35 </p></div><div class="refsect2"><a name="idp49087568"></a><h3>DBSizeWarn</h3><p>Default: 1000000000</p><p> 36 When set to non-zero, ctdb will log a warning during recovery if 37 a database size is bigger than this. This will produce a warning 38 if a database grows uncontrollably. 39 </p></div><div class="refsect2"><a name="idp49089360"></a><h3>DeferredAttachTO</h3><p>Default: 120</p><p> 40 When databases are frozen we do not allow clients to attach to 41 the databases. Instead of returning an error immediately to the 42 client, the attach request from the client is deferred until 43 the database becomes available again at which stage we respond 44 to the client. 45 </p><p> 46 This timeout controls how long we will defer the request from the 47 client before timing it out and returning an error to the client. 48 </p></div><div class="refsect2"><a name="idp54043296"></a><h3>DeterministicIPs</h3><p>Default: 0</p><p> 49 When set to 1, ctdb will try to keep public IP addresses locked 50 to specific nodes as far as possible. This makes it easier 51 for debugging since you can know that as long as all nodes are 52 healthy public IP X will always be hosted by node Y. 53 </p><p> 54 The cost of using deterministic IP address assignment is that it 55 disables part of the logic where ctdb tries to reduce the number 56 of public IP assignment changes in the cluster. This tunable may 57 increase the number of IP failover/failbacks that are performed 58 on the cluster by a small margin. 59 </p></div><div class="refsect2"><a name="idp54045872"></a><h3>DisableIPFailover</h3><p>Default: 0</p><p> 60 When set to non-zero, ctdb will not perform failover or 61 failback. Even if a node fails while holding public IPs, ctdb 62 will not recover the IPs or assign them to another node. 63 </p><p> 64 When this tunable is enabled, ctdb will no longer attempt 65 to recover the cluster by failing IP addresses over to other 66 nodes. This leads to a service outage until the administrator 67 has manually performed IP failover to replacement nodes using the 68 'ctdb moveip' command. 69 </p></div><div class="refsect2"><a name="idp54048368"></a><h3>ElectionTimeout</h3><p>Default: 3</p><p> 70 The number of seconds to wait for the election of recovery 71 master to complete. If the election is not completed during this 72 interval, then that round of election fails and ctdb starts a 73 new election. 74 </p></div><div class="refsect2"><a name="idp54050192"></a><h3>EnableBans</h3><p>Default: 1</p><p> 75 This parameter allows ctdb to ban a node if the node is misbehaving. 76 </p><p> 77 When set to 0, this disables banning completely in the cluster 78 and thus nodes can not get banned, even it they break. Don't 79 set to 0 unless you know what you are doing. You should set 80 this to the same value on all nodes to avoid unexpected behaviour. 81 </p></div><div class="refsect2"><a name="idp54052448"></a><h3>EventScriptTimeout</h3><p>Default: 30</p><p> 68 82 Maximum time in seconds to allow an event to run before timing 69 83 out. This is the total time for all enabled scripts that are … … 74 88 success. The logic here is that the callers of these events 75 89 implement their own additional timeout. 76 </p></div><div class="refsect2"><a name="idp53900064"></a><h3>MonitorTimeoutCount</h3><p>Default: 20</p><p> 77 How many monitor events in a row need to timeout before a node 78 is flagged as UNHEALTHY. This setting is useful if scripts 79 can not be written so that they do not hang for benign 80 reasons. 81 </p></div><div class="refsect2"><a name="idp53901872"></a><h3>RecoveryGracePeriod</h3><p>Default: 120</p><p> 82 During recoveries, if a node has not caused recovery failures during the 83 last grace period, any records of transgressions that the node has caused 84 recovery failures will be forgiven. This resets the ban-counter back to 85 zero for that node. 86 </p></div><div class="refsect2"><a name="idp49113200"></a><h3>RecoveryBanPeriod</h3><p>Default: 300</p><p> 87 If a node becomes banned causing repetitive recovery failures. The node will 88 eventually become banned from the cluster. 89 This controls how long the culprit node will be banned from the cluster 90 before it is allowed to try to join the cluster again. 91 Don't set to small. A node gets banned for a reason and it is usually due 92 to real problems with the node. 93 </p></div><div class="refsect2"><a name="idp49115184"></a><h3>DatabaseHashSize</h3><p>Default: 100001</p><p> 94 Size of the hash chains for the local store of the tdbs that ctdb manages. 95 </p></div><div class="refsect2"><a name="idp49116784"></a><h3>DatabaseMaxDead</h3><p>Default: 5</p><p> 96 How many dead records per hashchain in the TDB database do we allow before 97 the freelist needs to be processed. 98 </p></div><div class="refsect2"><a name="idp49118528"></a><h3>RerecoveryTimeout</h3><p>Default: 10</p><p> 99 Once a recovery has completed, no additional recoveries are permitted 100 until this timeout has expired. 101 </p></div><div class="refsect2"><a name="idp49120256"></a><h3>EnableBans</h3><p>Default: 1</p><p> 102 When set to 0, this disables BANNING completely in the cluster and thus 103 nodes can not get banned, even it they break. Don't set to 0 unless you 104 know what you are doing. You should set this to the same value on 105 all nodes to avoid unexpected behaviour. 106 </p></div><div class="refsect2"><a name="idp49122128"></a><h3>DeterministicIPs</h3><p>Default: 0</p><p> 107 When enabled, this tunable makes ctdb try to keep public IP addresses 108 locked to specific nodes as far as possible. This makes it easier for 109 debugging since you can know that as long as all nodes are healthy 110 public IP X will always be hosted by node Y. 111 </p><p> 112 The cost of using deterministic IP address assignment is that it 113 disables part of the logic where ctdb tries to reduce the number of 114 public IP assignment changes in the cluster. This tunable may increase 115 the number of IP failover/failbacks that are performed on the cluster 116 by a small margin. 117 </p></div><div class="refsect2"><a name="idp49124720"></a><h3>LCP2PublicIPs</h3><p>Default: 1</p><p> 118 When enabled this switches ctdb to use the LCP2 ip allocation 119 algorithm. 120 </p></div><div class="refsect2"><a name="idp49126320"></a><h3>ReclockPingPeriod</h3><p>Default: x</p><p> 121 Obsolete 122 </p></div><div class="refsect2"><a name="idp49127952"></a><h3>NoIPFailback</h3><p>Default: 0</p><p> 123 When set to 1, ctdb will not perform failback of IP addresses when a node 124 becomes healthy. Ctdb WILL perform failover of public IP addresses when a 125 node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb 126 will not fail the addresses back. 127 </p><p> 128 Use with caution! Normally when a node becomes available to the cluster 129 ctdb will try to reassign public IP addresses onto the new node as a way 130 to distribute the workload evenly across the clusternode. Ctdb tries to 131 make sure that all running nodes have approximately the same number of 132 public addresses it hosts. 133 </p><p> 134 When you enable this tunable, CTDB will no longer attempt to rebalance 135 the cluster by failing IP addresses back to the new nodes. An unbalanced 136 cluster will therefore remain unbalanced until there is manual 137 intervention from the administrator. When this parameter is set, you can 138 manually fail public IP addresses over to the new node(s) using the 139 'ctdb moveip' command. 140 </p></div><div class="refsect2"><a name="idp49136144"></a><h3>DisableIPFailover</h3><p>Default: 0</p><p> 141 When enabled, ctdb will not perform failover or failback. Even if a 142 node fails while holding public IPs, ctdb will not recover the IPs or 143 assign them to another node. 144 </p><p> 145 When you enable this tunable, CTDB will no longer attempt to recover 146 the cluster by failing IP addresses over to other nodes. This leads to 147 a service outage until the administrator has manually performed failover 148 to replacement nodes using the 'ctdb moveip' command. 149 </p></div><div class="refsect2"><a name="idp49138608"></a><h3>NoIPTakeover</h3><p>Default: 0</p><p> 150 When set to 1, ctdb will not allow IP addresses to be failed over 151 onto this node. Any IP addresses that the node currently hosts 152 will remain on the node but no new IP addresses can be failed over 153 to the node. 154 </p></div><div class="refsect2"><a name="idp49140448"></a><h3>NoIPHostOnAllDisabled</h3><p>Default: 0</p><p> 155 If no nodes are healthy then by default ctdb will happily host 90 </p></div><div class="refsect2"><a name="idp54054880"></a><h3>FetchCollapse</h3><p>Default: 1</p><p> 91 This parameter is used to avoid multiple migration requests for 92 the same record from a single node. All the record requests for 93 the same record are queued up and processed when the record is 94 migrated to the current node. 95 </p><p> 96 When many clients across many nodes try to access the same record 97 at the same time this can lead to a fetch storm where the record 98 becomes very active and bounces between nodes very fast. This 99 leads to high CPU utilization of the ctdbd daemon, trying to 100 bounce that record around very fast, and poor performance. 101 This can improve performance and reduce CPU utilization for 102 certain workloads. 103 </p></div><div class="refsect2"><a name="idp48966640"></a><h3>HopcountMakeSticky</h3><p>Default: 50</p><p> 104 For database(s) marked STICKY (using 'ctdb setdbsticky'), 105 any record that is migrating so fast that hopcount 106 exceeds this limit is marked as STICKY record for 107 <code class="varname">StickyDuration</code> seconds. This means that 108 after each migration the sticky record will be kept on the node 109 <code class="varname">StickyPindown</code>milliseconds and prevented from 110 being migrated off the node. 111 </p><p> 112 This will improve performance for certain workloads, such as 113 locking.tdb if many clients are opening/closing the same file 114 concurrently. 115 </p></div><div class="refsect2"><a name="idp48969952"></a><h3>KeepaliveInterval</h3><p>Default: 5</p><p> 116 How often in seconds should the nodes send keep-alive packets to 117 each other. 118 </p></div><div class="refsect2"><a name="idp48971552"></a><h3>KeepaliveLimit</h3><p>Default: 5</p><p> 119 After how many keepalive intervals without any traffic should 120 a node wait until marking the peer as DISCONNECTED. 121 </p><p> 122 If a node has hung, it can take 123 <code class="varname">KeepaliveInterval</code> * 124 (<code class="varname">KeepaliveLimit</code> + 1) seconds before 125 ctdb determines that the node is DISCONNECTED and performs 126 a recovery. This limit should not be set too high to enable 127 early detection and avoid any application timeouts (e.g. SMB1) 128 to kick in before the fail over is completed. 129 </p></div><div class="refsect2"><a name="idp48974864"></a><h3>LCP2PublicIPs</h3><p>Default: 1</p><p> 130 When set to 1, ctdb uses the LCP2 ip allocation algorithm. 131 </p></div><div class="refsect2"><a name="idp48976464"></a><h3>LockProcessesPerDB</h3><p>Default: 200</p><p> 132 This is the maximum number of lock helper processes ctdb will 133 create for obtaining record locks. When ctdb cannot get a record 134 lock without blocking, it creates a helper process that waits 135 for the lock to be obtained. 136 </p></div><div class="refsect2"><a name="idp48978304"></a><h3>LogLatencyMs</h3><p>Default: 0</p><p> 137 When set to non-zero, ctdb will log if certains operations 138 take longer than this value, in milliseconds, to complete. 139 These operations include "process a record request from client", 140 "take a record or database lock", "update a persistent database 141 record" and "vaccum a database". 142 </p></div><div class="refsect2"><a name="idp48980208"></a><h3>MaxQueueDropMsg</h3><p>Default: 1000000</p><p> 143 This is the maximum number of messages to be queued up for 144 a client before ctdb will treat the client as hung and will 145 terminate the client connection. 146 </p></div><div class="refsect2"><a name="idp48981984"></a><h3>MonitorInterval</h3><p>Default: 15</p><p> 147 How often should ctdb run the 'monitor' event in seconds to check 148 for a node's health. 149 </p></div><div class="refsect2"><a name="idp48988480"></a><h3>MonitorTimeoutCount</h3><p>Default: 20</p><p> 150 How many 'monitor' events in a row need to timeout before a node 151 is flagged as UNHEALTHY. This setting is useful if scripts can 152 not be written so that they do not hang for benign reasons. 153 </p></div><div class="refsect2"><a name="idp48990288"></a><h3>NoIPFailback</h3><p>Default: 0</p><p> 154 When set to 1, ctdb will not perform failback of IP addresses 155 when a node becomes healthy. When a node becomes UNHEALTHY, 156 ctdb WILL perform failover of public IP addresses, but when the 157 node becomes HEALTHY again, ctdb will not fail the addresses back. 158 </p><p> 159 Use with caution! Normally when a node becomes available to the 160 cluster ctdb will try to reassign public IP addresses onto the 161 new node as a way to distribute the workload evenly across the 162 clusternode. Ctdb tries to make sure that all running nodes have 163 approximately the same number of public addresses it hosts. 164 </p><p> 165 When you enable this tunable, ctdb will no longer attempt to 166 rebalance the cluster by failing IP addresses back to the new 167 nodes. An unbalanced cluster will therefore remain unbalanced 168 until there is manual intervention from the administrator. When 169 this parameter is set, you can manually fail public IP addresses 170 over to the new node(s) using the 'ctdb moveip' command. 171 </p></div><div class="refsect2"><a name="idp48993680"></a><h3>NoIPHostOnAllDisabled</h3><p>Default: 0</p><p> 172 If no nodes are HEALTHY then by default ctdb will happily host 156 173 public IPs on disabled (unhealthy or administratively disabled) 157 nodes. 174 nodes. This can cause problems, for example if the underlying 158 175 cluster filesystem is not mounted. When set to 1 on a node and 159 that node is disabled it, any IPs hosted by this node will be176 that node is disabled, any IPs hosted by this node will be 160 177 released and the node will not takeover any IPs until it is no 161 178 longer disabled. 162 </p></div><div class="refsect2"><a name="idp49142480"></a><h3>DBRecordCountWarn</h3><p>Default: 100000</p><p> 163 When set to non-zero, ctdb will log a warning when we try to recover a 164 database with more than this many records. This will produce a warning 165 if a database grows uncontrollably with orphaned records. 166 </p></div><div class="refsect2"><a name="idp49144304"></a><h3>DBRecordSizeWarn</h3><p>Default: 10000000</p><p> 167 When set to non-zero, ctdb will log a warning when we try to recover a 168 database where a single record is bigger than this. This will produce 169 a warning if a database record grows uncontrollably with orphaned 170 sub-records. 171 </p></div><div class="refsect2"><a name="idp49146144"></a><h3>DBSizeWarn</h3><p>Default: 1000000000</p><p> 172 When set to non-zero, ctdb will log a warning when we try to recover a 173 database bigger than this. This will produce 174 a warning if a database grows uncontrollably. 175 </p></div><div class="refsect2"><a name="idp49147936"></a><h3>VerboseMemoryNames</h3><p>Default: 0</p><p> 176 This feature consumes additional memory. when used the talloc library 177 will create more verbose names for all talloc allocated objects. 178 </p></div><div class="refsect2"><a name="idp49149696"></a><h3>RecdPingTimeout</h3><p>Default: 60</p><p> 179 If the main dameon has not heard a "ping" from the recovery dameon for 180 this many seconds, the main dameon will log a message that the recovery 181 daemon is potentially hung. 182 </p></div><div class="refsect2"><a name="idp49151488"></a><h3>RecdFailCount</h3><p>Default: 10</p><p> 183 If the recovery daemon has failed to ping the main dameon for this many 184 consecutive intervals, the main daemon will consider the recovery daemon 185 as hung and will try to restart it to recover. 186 </p></div><div class="refsect2"><a name="idp49153312"></a><h3>LogLatencyMs</h3><p>Default: 0</p><p> 187 When set to non-zero, this will make the main daemon log any operation that 188 took longer than this value, in 'ms', to complete. 189 These include "how long time a lockwait child process needed", 190 "how long time to write to a persistent database" but also 191 "how long did it take to get a response to a CALL from a remote node". 192 </p></div><div class="refsect2"><a name="idp49155264"></a><h3>RecLockLatencyMs</h3><p>Default: 1000</p><p> 193 When using a reclock file for split brain prevention, if set to non-zero 194 this tunable will make the recovery dameon log a message if the fcntl() 195 call to lock/testlock the recovery file takes longer than this number of 196 ms. 197 </p></div><div class="refsect2"><a name="idp49157120"></a><h3>RecoveryDropAllIPs</h3><p>Default: 120</p><p> 198 If we have been stuck in recovery, or stopped, or banned, mode for 199 this many seconds we will force drop all held public addresses. 200 </p></div><div class="refsect2"><a name="idp55021168"></a><h3>VacuumInterval</h3><p>Default: 10</p><p> 179 </p></div><div class="refsect2"><a name="idp48995696"></a><h3>NoIPTakeover</h3><p>Default: 0</p><p> 180 When set to 1, ctdb will not allow IP addresses to be failed 181 over onto this node. Any IP addresses that the node currently 182 hosts will remain on the node but no new IP addresses can be 183 failed over to the node. 184 </p></div><div class="refsect2"><a name="idp48997536"></a><h3>PullDBPreallocation</h3><p>Default: 10*1024*1024</p><p> 185 This is the size of a record buffer to pre-allocate for sending 186 reply to PULLDB control. Usually record buffer starts with size 187 of the first record and gets reallocated every time a new record 188 is added to the record buffer. For a large number of records, 189 this can be very inefficient to grow the record buffer one record 190 at a time. 191 </p></div><div class="refsect2"><a name="idp48999504"></a><h3>RecBufferSizeLimit</h3><p>Default: 1000000</p><p> 192 This is the limit on the size of the record buffer to be sent 193 in various controls. This limit is used by new controls used 194 for recovery and controls used in vacuuming. 195 </p></div><div class="refsect2"><a name="idp49001328"></a><h3>RecdFailCount</h3><p>Default: 10</p><p> 196 If the recovery daemon has failed to ping the main dameon for 197 this many consecutive intervals, the main daemon will consider 198 the recovery daemon as hung and will try to restart it to recover. 199 </p></div><div class="refsect2"><a name="idp49003152"></a><h3>RecdPingTimeout</h3><p>Default: 60</p><p> 200 If the main dameon has not heard a "ping" from the recovery dameon 201 for this many seconds, the main dameon will log a message that 202 the recovery daemon is potentially hung. This also increments a 203 counter which is checked against <code class="varname">RecdFailCount</code> 204 for detection of hung recovery daemon. 205 </p></div><div class="refsect2"><a name="idp49005424"></a><h3>RecLockLatencyMs</h3><p>Default: 1000</p><p> 206 When using a reclock file for split brain prevention, if set 207 to non-zero this tunable will make the recovery dameon log a 208 message if the fcntl() call to lock/testlock the recovery file 209 takes longer than this number of milliseconds. 210 </p></div><div class="refsect2"><a name="idp49007280"></a><h3>RecoverInterval</h3><p>Default: 1</p><p> 211 How frequently in seconds should the recovery daemon perform the 212 consistency checks to determine if it should perform a recovery. 213 </p></div><div class="refsect2"><a name="idp49009040"></a><h3>RecoverPDBBySeqNum</h3><p>Default: 1</p><p> 214 When set to zero, database recovery for persistent databases is 215 record-by-record and recovery process simply collects the most 216 recent version of every individual record. 217 </p><p> 218 When set to non-zero, persistent databases will instead be 219 recovered as a whole db and not by individual records. The 220 node that contains the highest value stored in the record 221 "__db_sequence_number__" is selected and the copy of that nodes 222 database is used as the recovered database. 223 </p><p> 224 By default, recovery of persistent databses is done using 225 __db_sequence_number__ record. 226 </p></div><div class="refsect2"><a name="idp54874960"></a><h3>RecoverTimeout</h3><p>Default: 120</p><p> 227 This is the default setting for timeouts for controls when sent 228 from the recovery daemon. We allow longer control timeouts from 229 the recovery daemon than from normal use since the recovery 230 dameon often use controls that can take a lot longer than normal 231 controls. 232 </p></div><div class="refsect2"><a name="idp54876784"></a><h3>RecoveryBanPeriod</h3><p>Default: 300</p><p> 233 The duration in seconds for which a node is banned if the node 234 fails during recovery. After this time has elapsed the node will 235 automatically get unbanned and will attempt to rejoin the cluster. 236 </p><p> 237 A node usually gets banned due to real problems with the node. 238 Don't set this value too small. Otherwise, a problematic node 239 will try to re-join cluster too soon causing unnecessary recoveries. 240 </p></div><div class="refsect2"><a name="idp54879184"></a><h3>RecoveryDropAllIPs</h3><p>Default: 120</p><p> 241 If a node is stuck in recovery, or stopped, or banned, for this 242 many seconds, then ctdb will release all public addresses on 243 that node. 244 </p></div><div class="refsect2"><a name="idp54880880"></a><h3>RecoveryGracePeriod</h3><p>Default: 120</p><p> 245 During recoveries, if a node has not caused recovery failures 246 during the last grace period in seconds, any records of 247 transgressions that the node has caused recovery failures will be 248 forgiven. This resets the ban-counter back to zero for that node. 249 </p></div><div class="refsect2"><a name="idp54882720"></a><h3>RepackLimit</h3><p>Default: 10000</p><p> 250 During vacuuming, if the number of freelist records are more than 251 <code class="varname">RepackLimit</code>, then the database is repacked 252 to get rid of the freelist records to avoid fragmentation. 253 </p><p> 254 Databases are repacked only if both <code class="varname">RepackLimit</code> 255 and <code class="varname">VacuumLimit</code> are exceeded. 256 </p></div><div class="refsect2"><a name="idp54885920"></a><h3>RerecoveryTimeout</h3><p>Default: 10</p><p> 257 Once a recovery has completed, no additional recoveries are 258 permitted until this timeout in seconds has expired. 259 </p></div><div class="refsect2"><a name="idp54887600"></a><h3>Samba3AvoidDeadlocks</h3><p>Default: 0</p><p> 260 If set to non-zero, enable code that prevents deadlocks with Samba 261 (only for Samba 3.x). 262 </p><p> 263 This should be set to 1 only when using Samba version 3.x 264 to enable special code in ctdb to avoid deadlock with Samba 265 version 3.x. This code is not required for Samba version 4.x 266 and must not be enabled for Samba 4.x. 267 </p></div><div class="refsect2"><a name="idp54889888"></a><h3>SeqnumInterval</h3><p>Default: 1000</p><p> 268 Some databases have seqnum tracking enabled, so that samba will 269 be able to detect asynchronously when there has been updates 270 to the database. Everytime a database is updated its sequence 271 number is increased. 272 </p><p> 273 This tunable is used to specify in milliseconds how frequently 274 ctdb will send out updates to remote nodes to inform them that 275 the sequence number is increased. 276 </p></div><div class="refsect2"><a name="idp54892240"></a><h3>StatHistoryInterval</h3><p>Default: 1</p><p> 277 Granularity of the statistics collected in the statistics 278 history. This is reported by 'ctdb stats' command. 279 </p></div><div class="refsect2"><a name="idp54893904"></a><h3>StickyDuration</h3><p>Default: 600</p><p> 280 Once a record has been marked STICKY, this is the duration in 281 seconds, the record will be flagged as a STICKY record. 282 </p></div><div class="refsect2"><a name="idp54895584"></a><h3>StickyPindown</h3><p>Default: 200</p><p> 283 Once a STICKY record has been migrated onto a node, it will be 284 pinned down on that node for this number of milliseconds. Any 285 request from other nodes to migrate the record off the node will 286 be deferred. 287 </p></div><div class="refsect2"><a name="idp54897344"></a><h3>TakeoverTimeout</h3><p>Default: 9</p><p> 288 This is the duration in seconds in which ctdb tries to complete IP 289 failover. 290 </p></div><div class="refsect2"><a name="idp54898880"></a><h3>TDBMutexEnabled</h3><p>Default: 0</p><p> 291 This paramter enables TDB_MUTEX_LOCKING feature on volatile 292 databases if the robust mutexes are supported. This optimizes the 293 record locking using robust mutexes and is much more efficient 294 that using posix locks. 295 </p></div><div class="refsect2"><a name="idp54900656"></a><h3>TickleUpdateInterval</h3><p>Default: 20</p><p> 296 Every <code class="varname">TickleUpdateInterval</code> seconds, ctdb 297 synchronizes the client connection information across nodes. 298 </p></div><div class="refsect2"><a name="idp54902576"></a><h3>TraverseTimeout</h3><p>Default: 20</p><p> 299 This is the duration in seconds for which a database traverse 300 is allowed to run. If the traverse does not complete during 301 this interval, ctdb will abort the traverse. 302 </p></div><div class="refsect2"><a name="idp54904304"></a><h3>VacuumFastPathCount</h3><p>Default: 60</p><p> 303 During a vacuuming run, ctdb usually processes only the records 304 marked for deletion also called the fast path vacuuming. After 305 finishing <code class="varname">VacuumFastPathCount</code> number of fast 306 path vacuuming runs, ctdb will trigger a scan of complete database 307 for any empty records that need to be deleted. 308 </p></div><div class="refsect2"><a name="idp54906560"></a><h3>VacuumInterval</h3><p>Default: 10</p><p> 201 309 Periodic interval in seconds when vacuuming is triggered for 202 310 volatile databases. 203 </p></div><div class="refsect2"><a name="idp55022832"></a><h3>VacuumMaxRunTime</h3><p>Default: 120</p><p> 311 </p></div><div class="refsect2"><a name="idp54908224"></a><h3>VacuumLimit</h3><p>Default: 5000</p><p> 312 During vacuuming, if the number of deleted records are more than 313 <code class="varname">VacuumLimit</code>, then databases are repacked to 314 avoid fragmentation. 315 </p><p> 316 Databases are repacked only if both <code class="varname">RepackLimit</code> 317 and <code class="varname">VacuumLimit</code> are exceeded. 318 </p></div><div class="refsect2"><a name="idp54911392"></a><h3>VacuumMaxRunTime</h3><p>Default: 120</p><p> 204 319 The maximum time in seconds for which the vacuuming process is 205 320 allowed to run. If vacuuming process takes longer than this 206 321 value, then the vacuuming process is terminated. 207 </p></div><div class="refsect2"><a name="idp55024592"></a><h3>RepackLimit</h3><p>Default: 10000</p><p> 208 During vacuuming, if the number of freelist records are more 209 than <code class="varname">RepackLimit</code>, then databases are 210 repacked to get rid of the freelist records to avoid 211 fragmentation. 212 </p><p> 213 Databases are repacked only if both 214 <code class="varname">RepackLimit</code> and 215 <code class="varname">VacuumLimit</code> are exceeded. 216 </p></div><div class="refsect2"><a name="idp55027792"></a><h3>VacuumLimit</h3><p>Default: 5000</p><p> 217 During vacuuming, if the number of deleted records are more 218 than <code class="varname">VacuumLimit</code>, then databases are 219 repacked to avoid fragmentation. 220 </p><p> 221 Databases are repacked only if both 222 <code class="varname">RepackLimit</code> and 223 <code class="varname">VacuumLimit</code> are exceeded. 224 </p></div><div class="refsect2"><a name="idp55030864"></a><h3>VacuumFastPathCount</h3><p>Default: 60</p><p> 225 When a record is deleted, it is marked for deletion during 226 vacuuming. Vacuuming process usually processes this list to purge 227 the records from the database. If the number of records marked 228 for deletion are more than VacuumFastPathCount, then vacuuming 229 process will scan the complete database for empty records instead 230 of using the list of records marked for deletion. 231 </p></div><div class="refsect2"><a name="idp55032832"></a><h3>DeferredAttachTO</h3><p>Default: 120</p><p> 232 When databases are frozen we do not allow clients to attach to the 233 databases. Instead of returning an error immediately to the application 234 the attach request from the client is deferred until the database 235 becomes available again at which stage we respond to the client. 236 </p><p> 237 This timeout controls how long we will defer the request from the client 238 before timing it out and returning an error to the client. 239 </p></div><div class="refsect2"><a name="idp55035216"></a><h3>HopcountMakeSticky</h3><p>Default: 50</p><p> 240 If the database is set to 'STICKY' mode, using the 'ctdb setdbsticky' 241 command, any record that is seen as very hot and migrating so fast that 242 hopcount surpasses 50 is set to become a STICKY record for StickyDuration 243 seconds. This means that after each migration the record will be kept on 244 the node and prevented from being migrated off the node. 245 </p><p> 246 This setting allows one to try to identify such records and stop them from 247 migrating across the cluster so fast. This will improve performance for 248 certain workloads, such as locking.tdb if many clients are opening/closing 249 the same file concurrently. 250 </p></div><div class="refsect2"><a name="idp55037776"></a><h3>StickyDuration</h3><p>Default: 600</p><p> 251 Once a record has been found to be fetch-lock hot and has been flagged to 252 become STICKY, this is for how long, in seconds, the record will be 253 flagged as a STICKY record. 254 </p></div><div class="refsect2"><a name="idp55039504"></a><h3>StickyPindown</h3><p>Default: 200</p><p> 255 Once a STICKY record has been migrated onto a node, it will be pinned down 256 on that node for this number of ms. Any request from other nodes to migrate 257 the record off the node will be deferred until the pindown timer expires. 258 </p></div><div class="refsect2"><a name="idp55041296"></a><h3>StatHistoryInterval</h3><p>Default: 1</p><p> 259 Granularity of the statistics collected in the statistics history. 260 </p></div><div class="refsect2"><a name="idp55042928"></a><h3>AllowClientDBAttach</h3><p>Default: 1</p><p> 261 When set to 0, clients are not allowed to attach to any databases. 262 This can be used to temporarily block any new processes from attaching 263 to and accessing the databases. 264 </p></div><div class="refsect2"><a name="idp55044656"></a><h3>RecoverPDBBySeqNum</h3><p>Default: 1</p><p> 265 When set to zero, database recovery for persistent databases 266 is record-by-record and recovery process simply collects the 267 most recent version of every individual record. 268 </p><p> 269 When set to non-zero, persistent databases will instead be 270 recovered as a whole db and not by individual records. The 271 node that contains the highest value stored in the record 272 "__db_sequence_number__" is selected and the copy of that 273 nodes database is used as the recovered database. 274 </p><p> 275 By default, recovery of persistent databses is done using 276 __db_sequence_number__ record. 277 </p></div><div class="refsect2"><a name="idp55047584"></a><h3>FetchCollapse</h3><p>Default: 1</p><p> 278 When many clients across many nodes try to access the same record at the 279 same time this can lead to a fetch storm where the record becomes very 280 active and bounces between nodes very fast. This leads to high CPU 281 utilization of the ctdbd daemon, trying to bounce that record around 282 very fast, and poor performance. 283 </p><p> 284 This parameter is used to activate a fetch-collapse. A fetch-collapse 285 is when we track which records we have requests in flight so that we only 286 keep one request in flight from a certain node, even if multiple smbd 287 processes are attemtping to fetch the record at the same time. This 288 can improve performance and reduce CPU utilization for certain 289 workloads. 290 </p><p> 291 This timeout controls if we should collapse multiple fetch operations 292 of the same record into a single request and defer all duplicates or not. 293 </p></div><div class="refsect2"><a name="idp55050784"></a><h3>Samba3AvoidDeadlocks</h3><p>Default: 0</p><p> 294 Enable code that prevents deadlocks with Samba (only for Samba 3.x). 295 </p><p> 296 This should be set to 1 when using Samba version 3.x to enable special 297 code in CTDB to avoid deadlock with Samba version 3.x. This code 298 is not required for Samba version 4.x and must not be enabled for 299 Samba 4.x. 300 </p></div></div><div class="refsect1"><a name="idp55053168"></a><h2>SEE ALSO</h2><p> 322 </p></div><div class="refsect2"><a name="idp54913152"></a><h3>VerboseMemoryNames</h3><p>Default: 0</p><p> 323 When set to non-zero, ctdb assigns verbose names for some of 324 the talloc allocated memory objects. These names are visible 325 in the talloc memory report generated by 'ctdb dumpmemory'. 326 </p></div></div><div class="refsect1"><a name="idp54915024"></a><h2>SEE ALSO</h2><p> 301 327 <span class="citerefentry"><span class="refentrytitle">ctdb</span>(1)</span>, 302 328 
- 
      vendor/current/ctdb/doc/ctdb-tunables.7.xmlr988 r989 30 30 </para> 31 31 32 <refsect2> 33 <title>MaxRedirectCount</title> 34 <para>Default: 3</para> 35 <para> 36 If we are not the DMASTER and need to fetch a record across the network 37 we first send the request to the LMASTER after which the record 38 is passed onto the current DMASTER. If the DMASTER changes before 39 the request has reached that node, the request will be passed onto the 40 "next" DMASTER. For very hot records that migrate rapidly across the 41 cluster this can cause a request to "chase" the record for many hops 42 before it catches up with the record. 43 44 this is how many hops we allow trying to chase the DMASTER before we 45 switch back to the LMASTER again to ask for new directions. 46 </para> 47 <para> 48 When chasing a record, this is how many hops we will chase the record 49 for before going back to the LMASTER to ask for new guidance. 50 </para> 51 </refsect2> 52 53 <refsect2> 54 <title>SeqnumInterval</title> 55 <para>Default: 1000</para> 56 <para> 57 Some databases have seqnum tracking enabled, so that samba will be able 58 to detect asynchronously when there has been updates to the database. 59 Everytime a database is updated its sequence number is increased. 60 </para> 61 <para> 62 This tunable is used to specify in 'ms' how frequently ctdb will 63 send out updates to remote nodes to inform them that the sequence 64 number is increased. 32 <para> 33 The tunable variables are listed alphabetically. 34 </para> 35 36 <refsect2> 37 <title>AllowClientDBAttach</title> 38 <para>Default: 1</para> 39 <para> 40 When set to 0, clients are not allowed to attach to any databases. 41 This can be used to temporarily block any new processes from 42 attaching to and accessing the databases. This is mainly used 43 for detaching a volatile database using 'ctdb detach'. 44 </para> 45 </refsect2> 46 47 <refsect2> 48 <title>AllowUnhealthyDBRead</title> 49 <para>Default: 0</para> 50 <para> 51 When set to 1, ctdb allows database traverses to read unhealthy 52 databases. By default, ctdb does not allow reading records from 53 unhealthy databases. 65 54 </para> 66 55 </refsect2> … … 70 59 <para>Default: 60</para> 71 60 <para> 72 This is the default 73 setting for timeout for when sending a control message to either the 74 local or a remote ctdb daemon. 75 </para> 76 </refsect2> 77 78 <refsect2> 79 <title>TraverseTimeout</title> 80 <para>Default: 20</para> 81 <para> 82 This setting controls how long we allow a traverse process to run. 83 After this timeout triggers, the main ctdb daemon will abort the 84 traverse if it has not yet finished. 85 </para> 86 </refsect2> 87 88 <refsect2> 89 <title>KeepaliveInterval</title> 61 This is the default setting for timeout for when sending a 62 control message to either the local or a remote ctdb daemon. 63 </para> 64 </refsect2> 65 66 <refsect2> 67 <title>DatabaseHashSize</title> 68 <para>Default: 100001</para> 69 <para> 70 Number of the hash chains for the local store of the tdbs that 71 ctdb manages. 72 </para> 73 </refsect2> 74 75 <refsect2> 76 <title>DatabaseMaxDead</title> 90 77 <para>Default: 5</para> 91 78 <para> 92 How often in seconds should the nodes send keepalives to eachother. 93 </para> 94 </refsect2> 95 96 <refsect2> 97 <title>KeepaliveLimit</title> 98 <para>Default: 5</para> 99 <para> 100 After how many keepalive intervals without any traffic should a node 101 wait until marking the peer as DISCONNECTED. 102 </para> 103 <para> 104 If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1) 105 seconds before we determine that the node is DISCONNECTED and that we 106 require a recovery. This limitshould not be set too high since we want 107 a hung node to be detectec, and expunged from the cluster well before 108 common CIFS timeouts (45-90 seconds) kick in. 109 </para> 110 </refsect2> 111 112 <refsect2> 113 <title>RecoverTimeout</title> 114 <para>Default: 20</para> 115 <para> 116 This is the default setting for timeouts for controls when sent from the 117 recovery daemon. We allow longer control timeouts from the recovery daemon 118 than from normal use since the recovery dameon often use controls that 119 can take a lot longer than normal controls. 120 </para> 121 </refsect2> 122 123 <refsect2> 124 <title>RecoverInterval</title> 125 <para>Default: 1</para> 126 <para> 127 How frequently in seconds should the recovery daemon perform the 128 consistency checks that determine if we need to perform a recovery or not. 79 Maximum number of dead records per hash chain for the tdb databses 80 managed by ctdb. 81 </para> 82 </refsect2> 83 84 <refsect2> 85 <title>DBRecordCountWarn</title> 86 <para>Default: 100000</para> 87 <para> 88 When set to non-zero, ctdb will log a warning during recovery if 89 a database has more than this many records. This will produce a 90 warning if a database grows uncontrollably with orphaned records. 91 </para> 92 </refsect2> 93 94 <refsect2> 95 <title>DBRecordSizeWarn</title> 96 <para>Default: 10000000</para> 97 <para> 98 When set to non-zero, ctdb will log a warning during recovery 99 if a single record is bigger than this size. This will produce 100 a warning if a database record grows uncontrollably. 101 </para> 102 </refsect2> 103 104 <refsect2> 105 <title>DBSizeWarn</title> 106 <para>Default: 1000000000</para> 107 <para> 108 When set to non-zero, ctdb will log a warning during recovery if 109 a database size is bigger than this. This will produce a warning 110 if a database grows uncontrollably. 111 </para> 112 </refsect2> 113 114 <refsect2> 115 <title>DeferredAttachTO</title> 116 <para>Default: 120</para> 117 <para> 118 When databases are frozen we do not allow clients to attach to 119 the databases. Instead of returning an error immediately to the 120 client, the attach request from the client is deferred until 121 the database becomes available again at which stage we respond 122 to the client. 123 </para> 124 <para> 125 This timeout controls how long we will defer the request from the 126 client before timing it out and returning an error to the client. 127 </para> 128 </refsect2> 129 130 <refsect2> 131 <title>DeterministicIPs</title> 132 <para>Default: 0</para> 133 <para> 134 When set to 1, ctdb will try to keep public IP addresses locked 135 to specific nodes as far as possible. This makes it easier 136 for debugging since you can know that as long as all nodes are 137 healthy public IP X will always be hosted by node Y. 138 </para> 139 <para> 140 The cost of using deterministic IP address assignment is that it 141 disables part of the logic where ctdb tries to reduce the number 142 of public IP assignment changes in the cluster. This tunable may 143 increase the number of IP failover/failbacks that are performed 144 on the cluster by a small margin. 145 </para> 146 </refsect2> 147 148 <refsect2> 149 <title>DisableIPFailover</title> 150 <para>Default: 0</para> 151 <para> 152 When set to non-zero, ctdb will not perform failover or 153 failback. Even if a node fails while holding public IPs, ctdb 154 will not recover the IPs or assign them to another node. 155 </para> 156 <para> 157 When this tunable is enabled, ctdb will no longer attempt 158 to recover the cluster by failing IP addresses over to other 159 nodes. This leads to a service outage until the administrator 160 has manually performed IP failover to replacement nodes using the 161 'ctdb moveip' command. 129 162 </para> 130 163 </refsect2> … … 134 167 <para>Default: 3</para> 135 168 <para> 136 When electing a new recovery master, this is how many seconds we allow 137 the election to take before we either deem the election finished 138 or we fail the election and start a new one. 139 </para> 140 </refsect2> 141 142 <refsect2> 143 <title>TakeoverTimeout</title> 144 <para>Default: 9</para> 145 <para> 146 This is how many seconds we allow controls to take for IP failover events. 147 </para> 148 </refsect2> 149 150 <refsect2> 151 <title>MonitorInterval</title> 152 <para>Default: 15</para> 153 <para> 154 How often should ctdb run the event scripts to check for a nodes health. 155 </para> 156 </refsect2> 157 158 <refsect2> 159 <title>TickleUpdateInterval</title> 160 <para>Default: 20</para> 161 <para> 162 How often will ctdb record and store the "tickle" information used to 163 kickstart stalled tcp connections after a recovery. 169 The number of seconds to wait for the election of recovery 170 master to complete. If the election is not completed during this 171 interval, then that round of election fails and ctdb starts a 172 new election. 173 </para> 174 </refsect2> 175 176 <refsect2> 177 <title>EnableBans</title> 178 <para>Default: 1</para> 179 <para> 180 This parameter allows ctdb to ban a node if the node is misbehaving. 181 </para> 182 <para> 183 When set to 0, this disables banning completely in the cluster 184 and thus nodes can not get banned, even it they break. Don't 185 set to 0 unless you know what you are doing. You should set 186 this to the same value on all nodes to avoid unexpected behaviour. 164 187 </para> 165 188 </refsect2> … … 173 196 run for an event, not just a single event script. 174 197 </para> 175 176 198 <para> 177 199 Note that timeouts are ignored for some events ("takeip", … … 183 205 184 206 <refsect2> 207 <title>FetchCollapse</title> 208 <para>Default: 1</para> 209 <para> 210 This parameter is used to avoid multiple migration requests for 211 the same record from a single node. All the record requests for 212 the same record are queued up and processed when the record is 213 migrated to the current node. 214 </para> 215 <para> 216 When many clients across many nodes try to access the same record 217 at the same time this can lead to a fetch storm where the record 218 becomes very active and bounces between nodes very fast. This 219 leads to high CPU utilization of the ctdbd daemon, trying to 220 bounce that record around very fast, and poor performance. 221 This can improve performance and reduce CPU utilization for 222 certain workloads. 223 </para> 224 </refsect2> 225 226 <refsect2> 227 <title>HopcountMakeSticky</title> 228 <para>Default: 50</para> 229 <para> 230 For database(s) marked STICKY (using 'ctdb setdbsticky'), 231 any record that is migrating so fast that hopcount 232 exceeds this limit is marked as STICKY record for 233 <varname>StickyDuration</varname> seconds. This means that 234 after each migration the sticky record will be kept on the node 235 <varname>StickyPindown</varname>milliseconds and prevented from 236 being migrated off the node. 237 </para> 238 <para> 239 This will improve performance for certain workloads, such as 240 locking.tdb if many clients are opening/closing the same file 241 concurrently. 242 </para> 243 </refsect2> 244 245 <refsect2> 246 <title>KeepaliveInterval</title> 247 <para>Default: 5</para> 248 <para> 249 How often in seconds should the nodes send keep-alive packets to 250 each other. 251 </para> 252 </refsect2> 253 254 <refsect2> 255 <title>KeepaliveLimit</title> 256 <para>Default: 5</para> 257 <para> 258 After how many keepalive intervals without any traffic should 259 a node wait until marking the peer as DISCONNECTED. 260 </para> 261 <para> 262 If a node has hung, it can take 263 <varname>KeepaliveInterval</varname> * 264 (<varname>KeepaliveLimit</varname> + 1) seconds before 265 ctdb determines that the node is DISCONNECTED and performs 266 a recovery. This limit should not be set too high to enable 267 early detection and avoid any application timeouts (e.g. SMB1) 268 to kick in before the fail over is completed. 269 </para> 270 </refsect2> 271 272 <refsect2> 273 <title>LCP2PublicIPs</title> 274 <para>Default: 1</para> 275 <para> 276 When set to 1, ctdb uses the LCP2 ip allocation algorithm. 277 </para> 278 </refsect2> 279 280 <refsect2> 281 <title>LockProcessesPerDB</title> 282 <para>Default: 200</para> 283 <para> 284 This is the maximum number of lock helper processes ctdb will 285 create for obtaining record locks. When ctdb cannot get a record 286 lock without blocking, it creates a helper process that waits 287 for the lock to be obtained. 288 </para> 289 </refsect2> 290 291 <refsect2> 292 <title>LogLatencyMs</title> 293 <para>Default: 0</para> 294 <para> 295 When set to non-zero, ctdb will log if certains operations 296 take longer than this value, in milliseconds, to complete. 297 These operations include "process a record request from client", 298 "take a record or database lock", "update a persistent database 299 record" and "vaccum a database". 300 </para> 301 </refsect2> 302 303 <refsect2> 304 <title>MaxQueueDropMsg</title> 305 <para>Default: 1000000</para> 306 <para> 307 This is the maximum number of messages to be queued up for 308 a client before ctdb will treat the client as hung and will 309 terminate the client connection. 310 </para> 311 </refsect2> 312 313 <refsect2> 314 <title>MonitorInterval</title> 315 <para>Default: 15</para> 316 <para> 317 How often should ctdb run the 'monitor' event in seconds to check 318 for a node's health. 319 </para> 320 </refsect2> 321 322 <refsect2> 185 323 <title>MonitorTimeoutCount</title> 186 324 <para>Default: 20</para> 187 325 <para> 188 How many monitor events in a row need to timeout before a node 189 is flagged as UNHEALTHY. This setting is useful if scripts 190 can not be written so that they do not hang for benign 191 reasons. 326 How many 'monitor' events in a row need to timeout before a node 327 is flagged as UNHEALTHY. This setting is useful if scripts can 328 not be written so that they do not hang for benign reasons. 329 </para> 330 </refsect2> 331 332 <refsect2> 333 <title>NoIPFailback</title> 334 <para>Default: 0</para> 335 <para> 336 When set to 1, ctdb will not perform failback of IP addresses 337 when a node becomes healthy. When a node becomes UNHEALTHY, 338 ctdb WILL perform failover of public IP addresses, but when the 339 node becomes HEALTHY again, ctdb will not fail the addresses back. 340 </para> 341 <para> 342 Use with caution! Normally when a node becomes available to the 343 cluster ctdb will try to reassign public IP addresses onto the 344 new node as a way to distribute the workload evenly across the 345 clusternode. Ctdb tries to make sure that all running nodes have 346 approximately the same number of public addresses it hosts. 347 </para> 348 <para> 349 When you enable this tunable, ctdb will no longer attempt to 350 rebalance the cluster by failing IP addresses back to the new 351 nodes. An unbalanced cluster will therefore remain unbalanced 352 until there is manual intervention from the administrator. When 353 this parameter is set, you can manually fail public IP addresses 354 over to the new node(s) using the 'ctdb moveip' command. 355 </para> 356 </refsect2> 357 358 <refsect2> 359 <title>NoIPHostOnAllDisabled</title> 360 <para>Default: 0</para> 361 <para> 362 If no nodes are HEALTHY then by default ctdb will happily host 363 public IPs on disabled (unhealthy or administratively disabled) 364 nodes. This can cause problems, for example if the underlying 365 cluster filesystem is not mounted. When set to 1 on a node and 366 that node is disabled, any IPs hosted by this node will be 367 released and the node will not takeover any IPs until it is no 368 longer disabled. 369 </para> 370 </refsect2> 371 372 <refsect2> 373 <title>NoIPTakeover</title> 374 <para>Default: 0</para> 375 <para> 376 When set to 1, ctdb will not allow IP addresses to be failed 377 over onto this node. Any IP addresses that the node currently 378 hosts will remain on the node but no new IP addresses can be 379 failed over to the node. 380 </para> 381 </refsect2> 382 383 <refsect2> 384 <title>PullDBPreallocation</title> 385 <para>Default: 10*1024*1024</para> 386 <para> 387 This is the size of a record buffer to pre-allocate for sending 388 reply to PULLDB control. Usually record buffer starts with size 389 of the first record and gets reallocated every time a new record 390 is added to the record buffer. For a large number of records, 391 this can be very inefficient to grow the record buffer one record 392 at a time. 393 </para> 394 </refsect2> 395 396 <refsect2> 397 <title>RecBufferSizeLimit</title> 398 <para>Default: 1000000</para> 399 <para> 400 This is the limit on the size of the record buffer to be sent 401 in various controls. This limit is used by new controls used 402 for recovery and controls used in vacuuming. 403 </para> 404 </refsect2> 405 406 <refsect2> 407 <title>RecdFailCount</title> 408 <para>Default: 10</para> 409 <para> 410 If the recovery daemon has failed to ping the main dameon for 411 this many consecutive intervals, the main daemon will consider 412 the recovery daemon as hung and will try to restart it to recover. 413 </para> 414 </refsect2> 415 416 <refsect2> 417 <title>RecdPingTimeout</title> 418 <para>Default: 60</para> 419 <para> 420 If the main dameon has not heard a "ping" from the recovery dameon 421 for this many seconds, the main dameon will log a message that 422 the recovery daemon is potentially hung. This also increments a 423 counter which is checked against <varname>RecdFailCount</varname> 424 for detection of hung recovery daemon. 425 </para> 426 </refsect2> 427 428 <refsect2> 429 <title>RecLockLatencyMs</title> 430 <para>Default: 1000</para> 431 <para> 432 When using a reclock file for split brain prevention, if set 433 to non-zero this tunable will make the recovery dameon log a 434 message if the fcntl() call to lock/testlock the recovery file 435 takes longer than this number of milliseconds. 436 </para> 437 </refsect2> 438 439 <refsect2> 440 <title>RecoverInterval</title> 441 <para>Default: 1</para> 442 <para> 443 How frequently in seconds should the recovery daemon perform the 444 consistency checks to determine if it should perform a recovery. 445 </para> 446 </refsect2> 447 448 <refsect2> 449 <title>RecoverPDBBySeqNum</title> 450 <para>Default: 1</para> 451 <para> 452 When set to zero, database recovery for persistent databases is 453 record-by-record and recovery process simply collects the most 454 recent version of every individual record. 455 </para> 456 <para> 457 When set to non-zero, persistent databases will instead be 458 recovered as a whole db and not by individual records. The 459 node that contains the highest value stored in the record 460 "__db_sequence_number__" is selected and the copy of that nodes 461 database is used as the recovered database. 462 </para> 463 <para> 464 By default, recovery of persistent databses is done using 465 __db_sequence_number__ record. 466 </para> 467 </refsect2> 468 469 <refsect2> 470 <title>RecoverTimeout</title> 471 <para>Default: 120</para> 472 <para> 473 This is the default setting for timeouts for controls when sent 474 from the recovery daemon. We allow longer control timeouts from 475 the recovery daemon than from normal use since the recovery 476 dameon often use controls that can take a lot longer than normal 477 controls. 478 </para> 479 </refsect2> 480 481 <refsect2> 482 <title>RecoveryBanPeriod</title> 483 <para>Default: 300</para> 484 <para> 485 The duration in seconds for which a node is banned if the node 486 fails during recovery. After this time has elapsed the node will 487 automatically get unbanned and will attempt to rejoin the cluster. 488 </para> 489 <para> 490 A node usually gets banned due to real problems with the node. 491 Don't set this value too small. Otherwise, a problematic node 492 will try to re-join cluster too soon causing unnecessary recoveries. 493 </para> 494 </refsect2> 495 496 <refsect2> 497 <title>RecoveryDropAllIPs</title> 498 <para>Default: 120</para> 499 <para> 500 If a node is stuck in recovery, or stopped, or banned, for this 501 many seconds, then ctdb will release all public addresses on 502 that node. 192 503 </para> 193 504 </refsect2> … … 197 508 <para>Default: 120</para> 198 509 <para> 199 During recoveries, if a node has not caused recovery failures during the 200 last grace period, any records of transgressions that the node has caused 201 recovery failures will be forgiven. This resets the ban-counter back to 202 zero for that node. 203 </para> 204 </refsect2> 205 206 <refsect2> 207 <title>RecoveryBanPeriod</title> 208 <para>Default: 300</para> 209 <para> 210 If a node becomes banned causing repetitive recovery failures. The node will 211 eventually become banned from the cluster. 212 This controls how long the culprit node will be banned from the cluster 213 before it is allowed to try to join the cluster again. 214 Don't set to small. A node gets banned for a reason and it is usually due 215 to real problems with the node. 216 </para> 217 </refsect2> 218 219 <refsect2> 220 <title>DatabaseHashSize</title> 221 <para>Default: 100001</para> 222 <para> 223 Size of the hash chains for the local store of the tdbs that ctdb manages. 224 </para> 225 </refsect2> 226 227 <refsect2> 228 <title>DatabaseMaxDead</title> 229 <para>Default: 5</para> 230 <para> 231 How many dead records per hashchain in the TDB database do we allow before 232 the freelist needs to be processed. 510 During recoveries, if a node has not caused recovery failures 511 during the last grace period in seconds, any records of 512 transgressions that the node has caused recovery failures will be 513 forgiven. This resets the ban-counter back to zero for that node. 514 </para> 515 </refsect2> 516 517 <refsect2> 518 <title>RepackLimit</title> 519 <para>Default: 10000</para> 520 <para> 521 During vacuuming, if the number of freelist records are more than 522 <varname>RepackLimit</varname>, then the database is repacked 523 to get rid of the freelist records to avoid fragmentation. 524 </para> 525 <para> 526 Databases are repacked only if both <varname>RepackLimit</varname> 527 and <varname>VacuumLimit</varname> are exceeded. 233 528 </para> 234 529 </refsect2> … … 238 533 <para>Default: 10</para> 239 534 <para> 240 Once a recovery has completed, no additional recoveries are permitted 241 until this timeout has expired. 242 </para> 243 </refsect2> 244 245 <refsect2> 246 <title>EnableBans</title> 535 Once a recovery has completed, no additional recoveries are 536 permitted until this timeout in seconds has expired. 537 </para> 538 </refsect2> 539 540 <refsect2> 541 <title>Samba3AvoidDeadlocks</title> 542 <para>Default: 0</para> 543 <para> 544 If set to non-zero, enable code that prevents deadlocks with Samba 545 (only for Samba 3.x). 546 </para> <para> 547 This should be set to 1 only when using Samba version 3.x 548 to enable special code in ctdb to avoid deadlock with Samba 549 version 3.x. This code is not required for Samba version 4.x 550 and must not be enabled for Samba 4.x. 551 </para> 552 </refsect2> 553 554 <refsect2> 555 <title>SeqnumInterval</title> 556 <para>Default: 1000</para> 557 <para> 558 Some databases have seqnum tracking enabled, so that samba will 559 be able to detect asynchronously when there has been updates 560 to the database. Everytime a database is updated its sequence 561 number is increased. 562 </para> 563 <para> 564 This tunable is used to specify in milliseconds how frequently 565 ctdb will send out updates to remote nodes to inform them that 566 the sequence number is increased. 567 </para> 568 </refsect2> 569 570 <refsect2> 571 <title>StatHistoryInterval</title> 247 572 <para>Default: 1</para> 248 573 <para> 249 When set to 0, this disables BANNING completely in the cluster and thus 250 nodes can not get banned, even it they break. Don't set to 0 unless you 251 know what you are doing. You should set this to the same value on 252 all nodes to avoid unexpected behaviour. 253 </para> 254 </refsect2> 255 256 <refsect2> 257 <title>DeterministicIPs</title> 258 <para>Default: 0</para> 259 <para> 260 When enabled, this tunable makes ctdb try to keep public IP addresses 261 locked to specific nodes as far as possible. This makes it easier for 262 debugging since you can know that as long as all nodes are healthy 263 public IP X will always be hosted by node Y. 264 </para> 265 <para> 266 The cost of using deterministic IP address assignment is that it 267 disables part of the logic where ctdb tries to reduce the number of 268 public IP assignment changes in the cluster. This tunable may increase 269 the number of IP failover/failbacks that are performed on the cluster 270 by a small margin. 271 </para> 272 273 </refsect2> 274 <refsect2> 275 <title>LCP2PublicIPs</title> 276 <para>Default: 1</para> 277 <para> 278 When enabled this switches ctdb to use the LCP2 ip allocation 279 algorithm. 280 </para> 281 </refsect2> 282 283 <refsect2> 284 <title>ReclockPingPeriod</title> 285 <para>Default: x</para> 286 <para> 287 Obsolete 288 </para> 289 </refsect2> 290 291 <refsect2> 292 <title>NoIPFailback</title> 293 <para>Default: 0</para> 294 <para> 295 When set to 1, ctdb will not perform failback of IP addresses when a node 296 becomes healthy. Ctdb WILL perform failover of public IP addresses when a 297 node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb 298 will not fail the addresses back. 299 </para> 300 <para> 301 Use with caution! Normally when a node becomes available to the cluster 302 ctdb will try to reassign public IP addresses onto the new node as a way 303 to distribute the workload evenly across the clusternode. Ctdb tries to 304 make sure that all running nodes have approximately the same number of 305 public addresses it hosts. 306 </para> 307 <para> 308 When you enable this tunable, CTDB will no longer attempt to rebalance 309 the cluster by failing IP addresses back to the new nodes. An unbalanced 310 cluster will therefore remain unbalanced until there is manual 311 intervention from the administrator. When this parameter is set, you can 312 manually fail public IP addresses over to the new node(s) using the 313 'ctdb moveip' command. 314 </para> 315 </refsect2> 316 317 <refsect2> 318 <title>DisableIPFailover</title> 319 <para>Default: 0</para> 320 <para> 321 When enabled, ctdb will not perform failover or failback. Even if a 322 node fails while holding public IPs, ctdb will not recover the IPs or 323 assign them to another node. 324 </para> 325 <para> 326 When you enable this tunable, CTDB will no longer attempt to recover 327 the cluster by failing IP addresses over to other nodes. This leads to 328 a service outage until the administrator has manually performed failover 329 to replacement nodes using the 'ctdb moveip' command. 330 </para> 331 </refsect2> 332 333 <refsect2> 334 <title>NoIPTakeover</title> 335 <para>Default: 0</para> 336 <para> 337 When set to 1, ctdb will not allow IP addresses to be failed over 338 onto this node. Any IP addresses that the node currently hosts 339 will remain on the node but no new IP addresses can be failed over 340 to the node. 341 </para> 342 </refsect2> 343 344 <refsect2> 345 <title>NoIPHostOnAllDisabled</title> 346 <para>Default: 0</para> 347 <para> 348 If no nodes are healthy then by default ctdb will happily host 349 public IPs on disabled (unhealthy or administratively disabled) 350 nodes. This can cause problems, for example if the underlying 351 cluster filesystem is not mounted. When set to 1 on a node and 352 that node is disabled it, any IPs hosted by this node will be 353 released and the node will not takeover any IPs until it is no 354 longer disabled. 355 </para> 356 </refsect2> 357 358 <refsect2> 359 <title>DBRecordCountWarn</title> 360 <para>Default: 100000</para> 361 <para> 362 When set to non-zero, ctdb will log a warning when we try to recover a 363 database with more than this many records. This will produce a warning 364 if a database grows uncontrollably with orphaned records. 365 </para> 366 </refsect2> 367 368 <refsect2> 369 <title>DBRecordSizeWarn</title> 370 <para>Default: 10000000</para> 371 <para> 372 When set to non-zero, ctdb will log a warning when we try to recover a 373 database where a single record is bigger than this. This will produce 374 a warning if a database record grows uncontrollably with orphaned 375 sub-records. 376 </para> 377 </refsect2> 378 379 <refsect2> 380 <title>DBSizeWarn</title> 381 <para>Default: 1000000000</para> 382 <para> 383 When set to non-zero, ctdb will log a warning when we try to recover a 384 database bigger than this. This will produce 385 a warning if a database grows uncontrollably. 386 </para> 387 </refsect2> 388 389 <refsect2> 390 <title>VerboseMemoryNames</title> 391 <para>Default: 0</para> 392 <para> 393 This feature consumes additional memory. when used the talloc library 394 will create more verbose names for all talloc allocated objects. 395 </para> 396 </refsect2> 397 398 <refsect2> 399 <title>RecdPingTimeout</title> 574 Granularity of the statistics collected in the statistics 575 history. This is reported by 'ctdb stats' command. 576 </para> 577 </refsect2> 578 579 <refsect2> 580 <title>StickyDuration</title> 581 <para>Default: 600</para> 582 <para> 583 Once a record has been marked STICKY, this is the duration in 584 seconds, the record will be flagged as a STICKY record. 585 </para> 586 </refsect2> 587 588 <refsect2> 589 <title>StickyPindown</title> 590 <para>Default: 200</para> 591 <para> 592 Once a STICKY record has been migrated onto a node, it will be 593 pinned down on that node for this number of milliseconds. Any 594 request from other nodes to migrate the record off the node will 595 be deferred. 596 </para> 597 </refsect2> 598 599 <refsect2> 600 <title>TakeoverTimeout</title> 601 <para>Default: 9</para> 602 <para> 603 This is the duration in seconds in which ctdb tries to complete IP 604 failover. 605 </para> 606 </refsect2> 607 608 <refsect2> 609 <title>TDBMutexEnabled</title> 610 <para>Default: 0</para> 611 <para> 612 This paramter enables TDB_MUTEX_LOCKING feature on volatile 613 databases if the robust mutexes are supported. This optimizes the 614 record locking using robust mutexes and is much more efficient 615 that using posix locks. 616 </para> 617 </refsect2> 618 619 <refsect2> 620 <title>TickleUpdateInterval</title> 621 <para>Default: 20</para> 622 <para> 623 Every <varname>TickleUpdateInterval</varname> seconds, ctdb 624 synchronizes the client connection information across nodes. 625 </para> 626 </refsect2> 627 628 <refsect2> 629 <title>TraverseTimeout</title> 630 <para>Default: 20</para> 631 <para> 632 This is the duration in seconds for which a database traverse 633 is allowed to run. If the traverse does not complete during 634 this interval, ctdb will abort the traverse. 635 </para> 636 </refsect2> 637 638 <refsect2> 639 <title>VacuumFastPathCount</title> 400 640 <para>Default: 60</para> 401 641 <para> 402 If the main dameon has not heard a "ping" from the recovery dameon for 403 this many seconds, the main dameon will log a message that the recovery 404 daemon is potentially hung. 405 </para> 406 </refsect2> 407 408 <refsect2> 409 <title>RecdFailCount</title> 410 <para>Default: 10</para> 411 <para> 412 If the recovery daemon has failed to ping the main dameon for this many 413 consecutive intervals, the main daemon will consider the recovery daemon 414 as hung and will try to restart it to recover. 415 </para> 416 </refsect2> 417 418 <refsect2> 419 <title>LogLatencyMs</title> 420 <para>Default: 0</para> 421 <para> 422 When set to non-zero, this will make the main daemon log any operation that 423 took longer than this value, in 'ms', to complete. 424 These include "how long time a lockwait child process needed", 425 "how long time to write to a persistent database" but also 426 "how long did it take to get a response to a CALL from a remote node". 427 </para> 428 </refsect2> 429 430 <refsect2> 431 <title>RecLockLatencyMs</title> 432 <para>Default: 1000</para> 433 <para> 434 When using a reclock file for split brain prevention, if set to non-zero 435 this tunable will make the recovery dameon log a message if the fcntl() 436 call to lock/testlock the recovery file takes longer than this number of 437 ms. 438 </para> 439 </refsect2> 440 441 <refsect2> 442 <title>RecoveryDropAllIPs</title> 443 <para>Default: 120</para> 444 <para> 445 If we have been stuck in recovery, or stopped, or banned, mode for 446 this many seconds we will force drop all held public addresses. 642 During a vacuuming run, ctdb usually processes only the records 643 marked for deletion also called the fast path vacuuming. After 644 finishing <varname>VacuumFastPathCount</varname> number of fast 645 path vacuuming runs, ctdb will trigger a scan of complete database 646 for any empty records that need to be deleted. 447 647 </para> 448 648 </refsect2> … … 454 654 Periodic interval in seconds when vacuuming is triggered for 455 655 volatile databases. 656 </para> 657 </refsect2> 658 659 <refsect2> 660 <title>VacuumLimit</title> 661 <para>Default: 5000</para> 662 <para> 663 During vacuuming, if the number of deleted records are more than 664 <varname>VacuumLimit</varname>, then databases are repacked to 665 avoid fragmentation. 666 </para> 667 <para> 668 Databases are repacked only if both <varname>RepackLimit</varname> 669 and <varname>VacuumLimit</varname> are exceeded. 456 670 </para> 457 671 </refsect2> … … 468 682 469 683 <refsect2> 470 <title>RepackLimit</title> 471 <para>Default: 10000</para> 472 <para> 473 During vacuuming, if the number of freelist records are more 474 than <varname>RepackLimit</varname>, then databases are 475 repacked to get rid of the freelist records to avoid 476 fragmentation. 477 </para> 478 <para> 479 Databases are repacked only if both 480 <varname>RepackLimit</varname> and 481 <varname>VacuumLimit</varname> are exceeded. 482 </para> 483 </refsect2> 484 485 <refsect2> 486 <title>VacuumLimit</title> 487 <para>Default: 5000</para> 488 <para> 489 During vacuuming, if the number of deleted records are more 490 than <varname>VacuumLimit</varname>, then databases are 491 repacked to avoid fragmentation. 492 </para> 493 <para> 494 Databases are repacked only if both 495 <varname>RepackLimit</varname> and 496 <varname>VacuumLimit</varname> are exceeded. 497 </para> 498 </refsect2> 499 500 <refsect2> 501 <title>VacuumFastPathCount</title> 502 <para>Default: 60</para> 503 <para> 504 When a record is deleted, it is marked for deletion during 505 vacuuming. Vacuuming process usually processes this list to purge 506 the records from the database. If the number of records marked 507 for deletion are more than VacuumFastPathCount, then vacuuming 508 process will scan the complete database for empty records instead 509 of using the list of records marked for deletion. 510 </para> 511 </refsect2> 512 513 <refsect2> 514 <title>DeferredAttachTO</title> 515 <para>Default: 120</para> 516 <para> 517 When databases are frozen we do not allow clients to attach to the 518 databases. Instead of returning an error immediately to the application 519 the attach request from the client is deferred until the database 520 becomes available again at which stage we respond to the client. 521 </para> 522 <para> 523 This timeout controls how long we will defer the request from the client 524 before timing it out and returning an error to the client. 525 </para> 526 </refsect2> 527 528 <refsect2> 529 <title>HopcountMakeSticky</title> 530 <para>Default: 50</para> 531 <para> 532 If the database is set to 'STICKY' mode, using the 'ctdb setdbsticky' 533 command, any record that is seen as very hot and migrating so fast that 534 hopcount surpasses 50 is set to become a STICKY record for StickyDuration 535 seconds. This means that after each migration the record will be kept on 536 the node and prevented from being migrated off the node. 537 </para> 538 <para> 539 This setting allows one to try to identify such records and stop them from 540 migrating across the cluster so fast. This will improve performance for 541 certain workloads, such as locking.tdb if many clients are opening/closing 542 the same file concurrently. 543 </para> 544 </refsect2> 545 546 <refsect2> 547 <title>StickyDuration</title> 548 <para>Default: 600</para> 549 <para> 550 Once a record has been found to be fetch-lock hot and has been flagged to 551 become STICKY, this is for how long, in seconds, the record will be 552 flagged as a STICKY record. 553 </para> 554 </refsect2> 555 556 <refsect2> 557 <title>StickyPindown</title> 558 <para>Default: 200</para> 559 <para> 560 Once a STICKY record has been migrated onto a node, it will be pinned down 561 on that node for this number of ms. Any request from other nodes to migrate 562 the record off the node will be deferred until the pindown timer expires. 563 </para> 564 </refsect2> 565 566 <refsect2> 567 <title>StatHistoryInterval</title> 568 <para>Default: 1</para> 569 <para> 570 Granularity of the statistics collected in the statistics history. 571 </para> 572 </refsect2> 573 574 <refsect2> 575 <title>AllowClientDBAttach</title> 576 <para>Default: 1</para> 577 <para> 578 When set to 0, clients are not allowed to attach to any databases. 579 This can be used to temporarily block any new processes from attaching 580 to and accessing the databases. 581 </para> 582 </refsect2> 583 584 <refsect2> 585 <title>RecoverPDBBySeqNum</title> 586 <para>Default: 1</para> 587 <para> 588 When set to zero, database recovery for persistent databases 589 is record-by-record and recovery process simply collects the 590 most recent version of every individual record. 591 </para> 592 <para> 593 When set to non-zero, persistent databases will instead be 594 recovered as a whole db and not by individual records. The 595 node that contains the highest value stored in the record 596 "__db_sequence_number__" is selected and the copy of that 597 nodes database is used as the recovered database. 598 </para> 599 <para> 600 By default, recovery of persistent databses is done using 601 __db_sequence_number__ record. 602 </para> 603 </refsect2> 604 605 <refsect2> 606 <title>FetchCollapse</title> 607 <para>Default: 1</para> 608 <para> 609 When many clients across many nodes try to access the same record at the 610 same time this can lead to a fetch storm where the record becomes very 611 active and bounces between nodes very fast. This leads to high CPU 612 utilization of the ctdbd daemon, trying to bounce that record around 613 very fast, and poor performance. 614 </para> 615 <para> 616 This parameter is used to activate a fetch-collapse. A fetch-collapse 617 is when we track which records we have requests in flight so that we only 618 keep one request in flight from a certain node, even if multiple smbd 619 processes are attemtping to fetch the record at the same time. This 620 can improve performance and reduce CPU utilization for certain 621 workloads. 622 </para> 623 <para> 624 This timeout controls if we should collapse multiple fetch operations 625 of the same record into a single request and defer all duplicates or not. 626 </para> 627 </refsect2> 628 629 <refsect2> 630 <title>Samba3AvoidDeadlocks</title> 631 <para>Default: 0</para> 632 <para> 633 Enable code that prevents deadlocks with Samba (only for Samba 3.x). 634 </para> 635 <para> 636 This should be set to 1 when using Samba version 3.x to enable special 637 code in CTDB to avoid deadlock with Samba version 3.x. This code 638 is not required for Samba version 4.x and must not be enabled for 639 Samba 4.x. 640 </para> 641 </refsect2> 684 <title>VerboseMemoryNames</title> 685 <para>Default: 0</para> 686 <para> 687 When set to non-zero, ctdb assigns verbose names for some of 688 the talloc allocated memory objects. These names are visible 689 in the talloc memory report generated by 'ctdb dumpmemory'. 690 </para> 691 </refsect2> 692 642 693 </refsect1> 643 694 
- 
      vendor/current/ctdb/include/ctdb_private.hr988 r989 438 438 uint32_t freeze_transaction_id; 439 439 uint32_t generation; 440 441 bool push_started; 442 void *push_state; 440 443 }; 441 444 … … 874 877 int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata); 875 878 879 int32_t ctdb_control_db_pull(struct ctdb_context *ctdb, 880 struct ctdb_req_control_old *c, 881 TDB_DATA indata, TDB_DATA *outdata); 882 int32_t ctdb_control_db_push_start(struct ctdb_context *ctdb, 883 TDB_DATA indata); 884 int32_t ctdb_control_db_push_confirm(struct ctdb_context *ctdb, 885 TDB_DATA indata, TDB_DATA *outdata); 886 876 887 int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb); 877 888 
- 
      vendor/current/ctdb/packaging/RPM/ctdb.spec.inr988 r989 208 208 %dir %{_libdir} 209 209 %{_libdir}/ctdb/lib* 210 %{_libdir}/libtevent-unix-util.so.0*211 210 %{_mandir}/man1/ctdb.1.gz 212 211 %{_mandir}/man1/ctdbd.1.gz … … 230 229 %files devel 231 230 %defattr(-,root,root) 232 %{_includedir}/ctdb/util/*.h233 %{_libdir}/libtevent-unix-util.so234 231 235 232 %package tests … … 246 243 %dir %{_datadir}/%{name}-tests 247 244 %{_datadir}/%{name}-tests/* 248 %dir %{_lib dir}/%{name}-tests249 %{_lib dir}/%{name}-tests/*245 %dir %{_libexecdir}/%{name}/tests 246 %{_libexecdir}/%{name}/tests/* 250 247 %{_bindir}/ctdb_run_tests 251 248 %{_bindir}/ctdb_run_cluster_tests 
- 
      vendor/current/ctdb/protocol/protocol.hr988 r989 121 121 /* SRVID prefix used by CTDB */ 122 122 #define CTDB_SRVID_PREFIX 0xF000000000000000LL 123 124 /* SRVID prefix used during recovery for pulling and pushing databases */ 125 #define CTDB_SRVID_RECOVERY 0xF001000000000000LL 126 127 /* SRVID to assign of banning credits */ 128 #define CTDB_SRVID_BANNING 0xF002000000000000LL 123 129 124 130 /* SRVID to inform of election data */ … … 356 362 CTDB_CONTROL_DB_TRANSACTION_COMMIT = 144, 357 363 CTDB_CONTROL_DB_TRANSACTION_CANCEL = 145, 364 CTDB_CONTROL_DB_PULL = 146, 365 CTDB_CONTROL_DB_PUSH_START = 147, 366 CTDB_CONTROL_DB_PUSH_CONFIRM = 148, 358 367 }; 359 368 … … 456 465 #define CTDB_LMASTER_ANY 0xffffffff 457 466 uint32_t lmaster; 467 }; 468 469 struct ctdb_pulldb_ext { 470 uint32_t db_id; 471 uint32_t lmaster; 472 uint64_t srvid; 458 473 }; 459 474 … … 620 635 uint32_t mutex_enabled; 621 636 uint32_t lock_processes_per_db; 637 uint32_t rec_buffer_size_limit; 622 638 }; 623 639 … … 705 721 */ 706 722 #define CTDB_CAP_PARALLEL_RECOVERY 0x00010000 707 708 #define CTDB_CAP_FEATURES (CTDB_CAP_PARALLEL_RECOVERY) 723 #define CTDB_CAP_FRAGMENTED_CONTROLS 0x00020000 724 725 #define CTDB_CAP_FEATURES (CTDB_CAP_PARALLEL_RECOVERY | \ 726 CTDB_CAP_FRAGMENTED_CONTROLS) 709 727 710 728 #define CTDB_CAP_DEFAULT (CTDB_CAP_RECMASTER | \ … … 856 874 uint32_t loglevel; 857 875 struct ctdb_pulldb *pulldb; 876 struct ctdb_pulldb_ext *pulldb_ext; 858 877 struct ctdb_rec_buffer *recbuf; 859 878 uint32_t recmode; … … 924 943 struct ctdb_db_statistics *dbstats; 925 944 enum ctdb_runstate runstate; 945 uint32_t num_records; 926 946 } data; 927 947 }; … … 977 997 /* SRVID_MEM_DUMP, SRVID_TAKEOVER_RUN */ 978 998 struct ctdb_srvid_message *msg; 979 /* SRVID_ REBALANCE_NODE */999 /* SRVID_BANNING, SRVID_REBALANCE_NODE */ 980 1000 uint32_t pnn; 981 1001 /* SRVID_DISABLE_TAKEOVER_RUNS, SRVID_DISABLE_RECOVERIES */ 
- 
      vendor/current/ctdb/protocol/protocol_api.hr988 r989 53 53 void *private_data); 54 54 55 int ctdb_rec_buffer_write(struct ctdb_rec_buffer *recbuf, int fd); 56 int ctdb_rec_buffer_read(int fd, TALLOC_CTX *mem_ctx, 57 struct ctdb_rec_buffer **out); 58 55 59 size_t ctdb_server_id_len(struct ctdb_server_id *sid); 56 60 void ctdb_server_id_push(struct ctdb_server_id *sid, uint8_t *buf); … … 635 639 int ctdb_reply_control_db_transaction_cancel(struct ctdb_reply_control *reply); 636 640 641 void ctdb_req_control_db_pull(struct ctdb_req_control *request, 642 struct ctdb_pulldb_ext *pulldb_ext); 643 int ctdb_reply_control_db_pull(struct ctdb_reply_control *reply, 644 uint32_t *num_records); 645 646 void ctdb_req_control_db_push_start(struct ctdb_req_control *request, 647 struct ctdb_pulldb_ext *pulldb_ext); 648 int ctdb_reply_control_db_push_start(struct ctdb_reply_control *reply); 649 650 void ctdb_req_control_db_push_confirm(struct ctdb_req_control *request, 651 uint32_t db_id); 652 int ctdb_reply_control_db_push_confirm(struct ctdb_reply_control *reply, 653 uint32_t *num_records); 654 637 655 /* From protocol/protocol_message.c */ 638 656 
- 
      vendor/current/ctdb/protocol/protocol_client.cr988 r989 2469 2469 return ctdb_reply_control_generic(reply); 2470 2470 } 2471 2472 /* CTDB_CONTROL_DB_PULL */ 2473 2474 void ctdb_req_control_db_pull(struct ctdb_req_control *request, 2475 struct ctdb_pulldb_ext *pulldb_ext) 2476 { 2477 request->opcode = CTDB_CONTROL_DB_PULL; 2478 request->pad = 0; 2479 request->srvid = 0; 2480 request->client_id = 0; 2481 request->flags = 0; 2482 2483 request->rdata.opcode = CTDB_CONTROL_DB_PULL; 2484 request->rdata.data.pulldb_ext = pulldb_ext; 2485 } 2486 2487 int ctdb_reply_control_db_pull(struct ctdb_reply_control *reply, 2488 uint32_t *num_records) 2489 { 2490 if (reply->status == 0 && 2491 reply->rdata.opcode == CTDB_CONTROL_DB_PULL) { 2492 *num_records = reply->rdata.data.num_records; 2493 } 2494 return reply->status; 2495 } 2496 2497 /* CTDB_CONTROL_DB_PUSH_START */ 2498 2499 void ctdb_req_control_db_push_start(struct ctdb_req_control *request, 2500 struct ctdb_pulldb_ext *pulldb_ext) 2501 { 2502 request->opcode = CTDB_CONTROL_DB_PUSH_START; 2503 request->pad = 0; 2504 request->srvid = 0; 2505 request->client_id = 0; 2506 request->flags = 0; 2507 2508 request->rdata.opcode = CTDB_CONTROL_DB_PUSH_START; 2509 request->rdata.data.pulldb_ext = pulldb_ext; 2510 } 2511 2512 int ctdb_reply_control_db_push_start(struct ctdb_reply_control *reply) 2513 { 2514 return ctdb_reply_control_generic(reply); 2515 } 2516 2517 /* CTDB_CONTROL_DB_PUSH_CONFIRM */ 2518 2519 void ctdb_req_control_db_push_confirm(struct ctdb_req_control *request, 2520 uint32_t db_id) 2521 { 2522 request->opcode = CTDB_CONTROL_DB_PUSH_CONFIRM; 2523 request->pad = 0; 2524 request->srvid = 0; 2525 request->client_id = 0; 2526 request->flags = 0; 2527 2528 request->rdata.opcode = CTDB_CONTROL_DB_PUSH_CONFIRM; 2529 request->rdata.data.db_id = db_id; 2530 } 2531 2532 int ctdb_reply_control_db_push_confirm(struct ctdb_reply_control *reply, 2533 uint32_t *num_records) 2534 { 2535 if (reply->status == 0 && 2536 reply->rdata.opcode == CTDB_CONTROL_DB_PUSH_CONFIRM) { 2537 *num_records = reply->rdata.data.num_records; 2538 } 2539 return reply->status; 2540 } 
- 
      vendor/current/ctdb/protocol/protocol_control.cr988 r989 50 50 { 51 51 size_t len = 0; 52 uint64_t u64;53 52 54 53 if (cd == NULL) { … … 385 384 386 385 case CTDB_CONTROL_GET_DB_SEQNUM: 387 u64 = cd->data.db_id; 388 len = ctdb_uint64_len(u64); 386 len = ctdb_uint64_len((uint64_t)cd->data.db_id); 389 387 break; 390 388 … … 480 478 481 479 case CTDB_CONTROL_DB_TRANSACTION_CANCEL: 480 len = ctdb_uint32_len(cd->data.db_id); 481 break; 482 483 case CTDB_CONTROL_DB_PULL: 484 len = ctdb_pulldb_ext_len(cd->data.pulldb_ext); 485 break; 486 487 case CTDB_CONTROL_DB_PUSH_START: 488 len = ctdb_pulldb_ext_len(cd->data.pulldb_ext); 489 break; 490 491 case CTDB_CONTROL_DB_PUSH_CONFIRM: 482 492 len = ctdb_uint32_len(cd->data.db_id); 483 493 break; … … 490 500 uint8_t *buf) 491 501 { 492 uint64_t u64;493 494 502 switch (cd->opcode) { 495 503 case CTDB_CONTROL_PROCESS_EXISTS: … … 713 721 714 722 case CTDB_CONTROL_GET_DB_SEQNUM: 715 u64 = cd->data.db_id; 716 ctdb_uint64_push(u64, buf); 723 ctdb_uint32_push(cd->data.db_id, buf); 717 724 break; 718 725 … … 790 797 791 798 case CTDB_CONTROL_DB_TRANSACTION_CANCEL: 799 ctdb_uint32_push(cd->data.db_id, buf); 800 break; 801 802 case CTDB_CONTROL_DB_PULL: 803 ctdb_pulldb_ext_push(cd->data.pulldb_ext, buf); 804 break; 805 806 case CTDB_CONTROL_DB_PUSH_START: 807 ctdb_pulldb_ext_push(cd->data.pulldb_ext, buf); 808 break; 809 810 case CTDB_CONTROL_DB_PUSH_CONFIRM: 792 811 ctdb_uint32_push(cd->data.db_id, buf); 793 812 break; … … 801 820 { 802 821 int ret = 0; 803 uint64_t u64 = 0;804 822 805 823 cd->opcode = opcode; … … 1080 1098 1081 1099 case CTDB_CONTROL_GET_DB_SEQNUM: 1082 ret = ctdb_uint 64_pull(buf, buflen, mem_ctx, &u64);1083 cd->data.db_id = (uint32_t)u64;1100 ret = ctdb_uint32_pull(buf, buflen, mem_ctx, 1101 &cd->data.db_id); 1084 1102 break; 1085 1103 … … 1178 1196 &cd->data.db_id); 1179 1197 break; 1198 1199 case CTDB_CONTROL_DB_PULL: 1200 ret = ctdb_pulldb_ext_pull(buf, buflen, mem_ctx, 1201 &cd->data.pulldb_ext); 1202 break; 1203 1204 case CTDB_CONTROL_DB_PUSH_START: 1205 ret = ctdb_pulldb_ext_pull(buf, buflen, mem_ctx, 1206 &cd->data.pulldb_ext); 1207 break; 1208 1209 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1210 ret = ctdb_uint32_pull(buf, buflen, mem_ctx, 1211 &cd->data.db_id); 1212 break; 1180 1213 } 1181 1214 … … 1574 1607 1575 1608 case CTDB_CONTROL_DB_TRANSACTION_CANCEL: 1609 break; 1610 1611 case CTDB_CONTROL_DB_PULL: 1612 len = ctdb_uint32_len(cd->data.num_records); 1613 break; 1614 1615 case CTDB_CONTROL_DB_PUSH_START: 1616 break; 1617 1618 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1619 len = ctdb_uint32_len(cd->data.num_records); 1576 1620 break; 1577 1621 } … … 1727 1771 ctdb_node_map_push(cd->data.nodemap, buf); 1728 1772 break; 1773 1774 case CTDB_CONTROL_DB_PULL: 1775 ctdb_uint32_push(cd->data.num_records, buf); 1776 break; 1777 1778 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1779 ctdb_uint32_push(cd->data.num_records, buf); 1780 break; 1729 1781 } 1730 1782 } … … 1913 1965 ret = ctdb_node_map_pull(buf, buflen, mem_ctx, 1914 1966 &cd->data.nodemap); 1967 break; 1968 1969 case CTDB_CONTROL_DB_PULL: 1970 ret = ctdb_uint32_pull(buf, buflen, mem_ctx, 1971 &cd->data.num_records); 1972 break; 1973 1974 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1975 ret = ctdb_uint32_pull(buf, buflen, mem_ctx, 1976 &cd->data.num_records); 1915 1977 break; 1916 1978 } 
- 
      vendor/current/ctdb/protocol/protocol_message.cr988 r989 41 41 42 42 switch (srvid) { 43 case CTDB_SRVID_BANNING: 44 len = ctdb_uint32_len(mdata->pnn); 45 break; 46 43 47 case CTDB_SRVID_ELECTION: 44 48 len = ctdb_election_message_len(mdata->election); … … 115 119 { 116 120 switch (srvid) { 121 case CTDB_SRVID_BANNING: 122 ctdb_uint32_push(mdata->pnn, buf); 123 break; 124 117 125 case CTDB_SRVID_ELECTION: 118 126 ctdb_election_message_push(mdata->election, buf); … … 190 198 191 199 switch (srvid) { 200 case CTDB_SRVID_BANNING: 201 ret = ctdb_uint32_pull(buf, buflen, mem_ctx, &mdata->pnn); 202 break; 203 192 204 case CTDB_SRVID_ELECTION: 193 205 ret = ctdb_election_message_pull(buf, buflen, mem_ctx, 
- 
      vendor/current/ctdb/protocol/protocol_private.hr988 r989 91 91 int ctdb_pulldb_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx, 92 92 struct ctdb_pulldb **out); 93 94 size_t ctdb_pulldb_ext_len(struct ctdb_pulldb_ext *pulldb); 95 void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *pulldb, uint8_t *buf); 96 int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx, 97 struct ctdb_pulldb_ext **out); 93 98 94 99 size_t ctdb_traverse_start_len(struct ctdb_traverse_start *traverse); 
- 
      vendor/current/ctdb/protocol/protocol_types.cr988 r989 466 466 467 467 pulldb = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_pulldb)); 468 if (pulldb == NULL) { 469 return ENOMEM; 470 } 471 472 *out = pulldb; 473 return 0; 474 } 475 476 size_t ctdb_pulldb_ext_len(struct ctdb_pulldb_ext *pulldb) 477 { 478 return sizeof(struct ctdb_pulldb_ext); 479 } 480 481 void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *pulldb, uint8_t *buf) 482 { 483 memcpy(buf, pulldb, sizeof(struct ctdb_pulldb_ext)); 484 } 485 486 int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx, 487 struct ctdb_pulldb_ext **out) 488 { 489 struct ctdb_pulldb_ext *pulldb; 490 491 if (buflen < sizeof(struct ctdb_pulldb_ext)) { 492 return EMSGSIZE; 493 } 494 495 pulldb = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_pulldb_ext)); 468 496 if (pulldb == NULL) { 469 497 return ENOMEM; … … 771 799 772 800 return ret; 801 } 802 803 int ctdb_rec_buffer_write(struct ctdb_rec_buffer *recbuf, int fd) 804 { 805 ssize_t n; 806 807 n = write(fd, &recbuf->db_id, sizeof(uint32_t)); 808 if (n == -1 || n != sizeof(uint32_t)) { 809 return (errno != 0 ? errno : EIO); 810 } 811 n = write(fd, &recbuf->count, sizeof(uint32_t)); 812 if (n == -1 || n != sizeof(uint32_t)) { 813 return (errno != 0 ? errno : EIO); 814 } 815 n = write(fd, &recbuf->buflen, sizeof(size_t)); 816 if (n == -1 || n != sizeof(size_t)) { 817 return (errno != 0 ? errno : EIO); 818 } 819 n = write(fd, recbuf->buf, recbuf->buflen); 820 if (n == -1 || n != recbuf->buflen) { 821 return (errno != 0 ? errno : EIO); 822 } 823 824 return 0; 825 } 826 827 int ctdb_rec_buffer_read(int fd, TALLOC_CTX *mem_ctx, 828 struct ctdb_rec_buffer **out) 829 { 830 struct ctdb_rec_buffer *recbuf; 831 ssize_t n; 832 833 recbuf = talloc(mem_ctx, struct ctdb_rec_buffer); 834 if (recbuf == NULL) { 835 return ENOMEM; 836 } 837 838 n = read(fd, &recbuf->db_id, sizeof(uint32_t)); 839 if (n == -1 || n != sizeof(uint32_t)) { 840 return (errno != 0 ? errno : EIO); 841 } 842 n = read(fd, &recbuf->count, sizeof(uint32_t)); 843 if (n == -1 || n != sizeof(uint32_t)) { 844 return (errno != 0 ? errno : EIO); 845 } 846 n = read(fd, &recbuf->buflen, sizeof(size_t)); 847 if (n == -1 || n != sizeof(size_t)) { 848 return (errno != 0 ? errno : EIO); 849 } 850 851 recbuf->buf = talloc_size(recbuf, recbuf->buflen); 852 if (recbuf->buf == NULL) { 853 return ENOMEM; 854 } 855 856 n = read(fd, recbuf->buf, recbuf->buflen); 857 if (n == -1 || n != recbuf->buflen) { 858 return (errno != 0 ? errno : EIO); 859 } 860 861 *out = recbuf; 862 return 0; 773 863 } 774 864 
- 
      vendor/current/ctdb/server/ctdb_control.cr988 r989 716 716 return ctdb_control_db_transaction_cancel(ctdb, indata); 717 717 718 case CTDB_CONTROL_DB_PULL: 719 CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_pulldb_ext)); 720 return ctdb_control_db_pull(ctdb, c, indata, outdata); 721 722 case CTDB_CONTROL_DB_PUSH_START: 723 CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_pulldb_ext)); 724 return ctdb_control_db_push_start(ctdb, indata); 725 726 case CTDB_CONTROL_DB_PUSH_CONFIRM: 727 CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); 728 return ctdb_control_db_push_confirm(ctdb, indata, outdata); 729 718 730 default: 719 731 DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); 
- 
      vendor/current/ctdb/server/ctdb_daemon.cr988 r989 44 44 #include "common/common.h" 45 45 #include "common/logging.h" 46 #include "common/pidfile.h" 46 47 47 48 struct ctdb_client_pid_list { … … 53 54 54 55 const char *ctdbd_pidfile = NULL; 56 static struct pidfile_context *ctdbd_pidfile_ctx = NULL; 55 57 56 58 static void daemon_incoming_packet(void *, struct ctdb_req_header *); … … 984 986 { 985 987 struct sockaddr_un addr; 988 int ret; 986 989 987 990 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0); … … 994 997 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1); 995 998 996 /* First check if an old ctdbd might be running */997 if (connect(ctdb->daemon.sd,998 (struct sockaddr *)&addr, sizeof(addr)) == 0) {999 DEBUG(DEBUG_CRIT,1000 ("Something is already listening on ctdb socket '%s'\n",1001 ctdb->daemon.name));1002 goto failed;1003 }1004 1005 999 /* Remove any old socket */ 1006 unlink(ctdb->daemon.name); 1000 ret = unlink(ctdb->daemon.name); 1001 if (ret == 0) { 1002 DEBUG(DEBUG_WARNING, 1003 ("Removed stale socket %s\n", ctdb->daemon.name)); 1004 } else if (errno != ENOENT) { 1005 DEBUG(DEBUG_ERR, 1006 ("Failed to remove stale socket %s\n", ctdb->daemon.name)); 1007 return -1; 1008 } 1007 1009 1008 1010 set_close_on_exec(ctdb->daemon.sd); … … 1128 1130 static void ctdb_remove_pidfile(void) 1129 1131 { 1130 /* Only the main ctdbd's PID matches the SID */ 1131 if (ctdbd_pidfile != NULL && getsid(0) == getpid()) { 1132 if (unlink(ctdbd_pidfile) == 0) { 1133 DEBUG(DEBUG_NOTICE, ("Removed PID file %s\n", 1134 ctdbd_pidfile)); 1135 } else { 1136 DEBUG(DEBUG_WARNING, ("Failed to Remove PID file %s\n", 1137 ctdbd_pidfile)); 1138 } 1139 } 1140 } 1141 1142 static void ctdb_create_pidfile(pid_t pid) 1132 TALLOC_FREE(ctdbd_pidfile_ctx); 1133 } 1134 1135 static void ctdb_create_pidfile(TALLOC_CTX *mem_ctx) 1143 1136 { 1144 1137 if (ctdbd_pidfile != NULL) { 1145 FILE *fp;1146 1147 fp = fopen(ctdbd_pidfile, "w");1148 if (fp == NULL) {1149 DEBUG(DEBUG_ALERT,1150 ("Failed to open PID file %s\n",ctdbd_pidfile));1138 int ret = pidfile_create(mem_ctx, ctdbd_pidfile, 1139 &ctdbd_pidfile_ctx); 1140 if (ret != 0) { 1141 DEBUG(DEBUG_ERR, 1142 ("Failed to create PID file %s\n", 1143 ctdbd_pidfile)); 1151 1144 exit(11); 1152 1145 } 1153 1146 1154 fprintf(fp, "%d\n", pid);1155 fclose(fp);1156 1147 DEBUG(DEBUG_NOTICE, ("Created PID file %s\n", ctdbd_pidfile)); 1157 1148 atexit(ctdb_remove_pidfile); … … 1214 1205 int res, ret = -1; 1215 1206 struct tevent_fd *fde; 1207 1208 if (do_fork && fork()) { 1209 return 0; 1210 } 1211 1212 if (do_fork) { 1213 if (setsid() == -1) { 1214 ctdb_die(ctdb, "Failed to setsid()\n"); 1215 } 1216 close(0); 1217 if (open("/dev/null", O_RDONLY) != 0) { 1218 DEBUG(DEBUG_ALERT,(__location__ " Failed to setup stdin on /dev/null\n")); 1219 exit(11); 1220 } 1221 } 1222 ignore_signal(SIGPIPE); 1223 ignore_signal(SIGUSR1); 1224 1225 ctdb->ctdbd_pid = getpid(); 1226 DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n", 1227 CTDB_VERSION_STRING, ctdb->ctdbd_pid)); 1228 ctdb_create_pidfile(ctdb); 1216 1229 1217 1230 /* create a unix domain stream socket to listen to */ … … 1222 1235 } 1223 1236 1224 if (do_fork && fork()) {1225 return 0;1226 }1227 1228 tdb_reopen_all(false);1229 1230 if (do_fork) {1231 if (setsid() == -1) {1232 ctdb_die(ctdb, "Failed to setsid()\n");1233 }1234 close(0);1235 if (open("/dev/null", O_RDONLY) != 0) {1236 DEBUG(DEBUG_ALERT,(__location__ " Failed to setup stdin on /dev/null\n"));1237 exit(11);1238 }1239 }1240 ignore_signal(SIGPIPE);1241 ignore_signal(SIGUSR1);1242 1243 ctdb->ctdbd_pid = getpid();1244 DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n",1245 CTDB_VERSION_STRING, ctdb->ctdbd_pid));1246 ctdb_create_pidfile(ctdb->ctdbd_pid);1247 1248 1237 /* Make sure we log something when the daemon terminates. 1249 1238 * This must be the first exit handler to run (so the last to … … 1261 1250 1262 1251 ctdb->ev = tevent_context_init(NULL); 1252 if (ctdb->ev == NULL) { 1253 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n")); 1254 exit(1); 1255 } 1263 1256 tevent_loop_allow_nesting(ctdb->ev); 1264 1257 tevent_set_trace_callback(ctdb->ev, ctdb_tevent_trace, ctdb); … … 1846 1839 /* get a new event context */ 1847 1840 ctdb->ev = tevent_context_init(ctdb); 1841 if (ctdb->ev == NULL) { 1842 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n")); 1843 exit(1); 1844 } 1848 1845 tevent_loop_allow_nesting(ctdb->ev); 1849 1846 
- 
      vendor/current/ctdb/server/ctdb_logging.cr988 r989 282 282 /* We'll fail if stderr/stdout not already open; it's simpler. */ 283 283 old_stdout = dup(STDOUT_FILENO); 284 if (old_stdout < 0) { 285 DEBUG(DEBUG_ERR, ("Failed to dup stdout for child logging\n")); 286 return -1; 287 } 284 288 old_stderr = dup(STDERR_FILENO); 285 if (old_stdout < 0 || old_stderr < 0) { 286 DEBUG(DEBUG_ERR, ("Failed to dup stdout/stderr for child logging\n")); 289 if (old_stderr < 0) { 290 DEBUG(DEBUG_ERR, ("Failed to dup stderr for child logging\n")); 291 close(old_stdout); 287 292 return -1; 288 293 } 
- 
      vendor/current/ctdb/server/ctdb_recover.cr988 r989 314 314 } 315 315 316 struct db_pull_state { 317 struct ctdb_context *ctdb; 318 struct ctdb_db_context *ctdb_db; 319 struct ctdb_marshall_buffer *recs; 320 uint32_t pnn; 321 uint64_t srvid; 322 uint32_t num_records; 323 }; 324 325 static int traverse_db_pull(struct tdb_context *tdb, TDB_DATA key, 326 TDB_DATA data, void *private_data) 327 { 328 struct db_pull_state *state = (struct db_pull_state *)private_data; 329 struct ctdb_marshall_buffer *recs; 330 331 recs = ctdb_marshall_add(state->ctdb, state->recs, 332 state->ctdb_db->db_id, 0, key, NULL, data); 333 if (recs == NULL) { 334 TALLOC_FREE(state->recs); 335 return -1; 336 } 337 state->recs = recs; 338 339 if (talloc_get_size(state->recs) >= 340 state->ctdb->tunable.rec_buffer_size_limit) { 341 TDB_DATA buffer; 342 int ret; 343 344 buffer = ctdb_marshall_finish(state->recs); 345 ret = ctdb_daemon_send_message(state->ctdb, state->pnn, 346 state->srvid, buffer); 347 if (ret != 0) { 348 TALLOC_FREE(state->recs); 349 return -1; 350 } 351 352 state->num_records += state->recs->count; 353 TALLOC_FREE(state->recs); 354 } 355 356 return 0; 357 } 358 359 int32_t ctdb_control_db_pull(struct ctdb_context *ctdb, 360 struct ctdb_req_control_old *c, 361 TDB_DATA indata, TDB_DATA *outdata) 362 { 363 struct ctdb_pulldb_ext *pulldb_ext; 364 struct ctdb_db_context *ctdb_db; 365 struct db_pull_state state; 366 int ret; 367 368 pulldb_ext = (struct ctdb_pulldb_ext *)indata.dptr; 369 370 ctdb_db = find_ctdb_db(ctdb, pulldb_ext->db_id); 371 if (ctdb_db == NULL) { 372 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", 373 pulldb_ext->db_id)); 374 return -1; 375 } 376 377 if (!ctdb_db_frozen(ctdb_db)) { 378 DEBUG(DEBUG_ERR, 379 ("rejecting ctdb_control_pull_db when not frozen\n")); 380 return -1; 381 } 382 383 if (ctdb_db->unhealthy_reason) { 384 /* this is just a warning, as the tdb should be empty anyway */ 385 DEBUG(DEBUG_WARNING, 386 ("db(%s) unhealty in ctdb_control_db_pull: %s\n", 387 ctdb_db->db_name, ctdb_db->unhealthy_reason)); 388 } 389 390 state.ctdb = ctdb; 391 state.ctdb_db = ctdb_db; 392 state.recs = NULL; 393 state.pnn = c->hdr.srcnode; 394 state.srvid = pulldb_ext->srvid; 395 state.num_records = 0; 396 397 if (ctdb_lockdb_mark(ctdb_db) != 0) { 398 DEBUG(DEBUG_ERR, 399 (__location__ " Failed to get lock on entire db - failing\n")); 400 return -1; 401 } 402 403 ret = tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_db_pull, &state); 404 if (ret == -1) { 405 DEBUG(DEBUG_ERR, 406 (__location__ " Failed to get traverse db '%s'\n", 407 ctdb_db->db_name)); 408 ctdb_lockdb_unmark(ctdb_db); 409 return -1; 410 } 411 412 /* Last few records */ 413 if (state.recs != NULL) { 414 TDB_DATA buffer; 415 416 buffer = ctdb_marshall_finish(state.recs); 417 ret = ctdb_daemon_send_message(state.ctdb, state.pnn, 418 state.srvid, buffer); 419 if (ret != 0) { 420 TALLOC_FREE(state.recs); 421 ctdb_lockdb_unmark(ctdb_db); 422 return -1; 423 } 424 425 state.num_records += state.recs->count; 426 TALLOC_FREE(state.recs); 427 } 428 429 ctdb_lockdb_unmark(ctdb_db); 430 431 outdata->dptr = talloc_size(outdata, sizeof(uint32_t)); 432 if (outdata->dptr == NULL) { 433 DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n")); 434 return -1; 435 } 436 437 memcpy(outdata->dptr, (uint8_t *)&state.num_records, sizeof(uint32_t)); 438 outdata->dsize = sizeof(uint32_t); 439 440 return 0; 441 } 442 316 443 /* 317 444 push a bunch of records into a ltdb, filtering by rsn … … 406 533 ctdb_lockdb_unmark(ctdb_db); 407 534 return -1; 535 } 536 537 struct db_push_state { 538 struct ctdb_context *ctdb; 539 struct ctdb_db_context *ctdb_db; 540 uint64_t srvid; 541 uint32_t num_records; 542 bool failed; 543 }; 544 545 static void db_push_msg_handler(uint64_t srvid, TDB_DATA indata, 546 void *private_data) 547 { 548 struct db_push_state *state = talloc_get_type( 549 private_data, struct db_push_state); 550 struct ctdb_marshall_buffer *recs; 551 struct ctdb_rec_data_old *rec; 552 int i, ret; 553 554 if (state->failed) { 555 return; 556 } 557 558 recs = (struct ctdb_marshall_buffer *)indata.dptr; 559 rec = (struct ctdb_rec_data_old *)&recs->data[0]; 560 561 DEBUG(DEBUG_INFO, ("starting push of %u records for dbid 0x%x\n", 562 recs->count, recs->db_id)); 563 564 for (i=0; i<recs->count; i++) { 565 TDB_DATA key, data; 566 struct ctdb_ltdb_header *hdr; 567 568 key.dptr = &rec->data[0]; 569 key.dsize = rec->keylen; 570 data.dptr = &rec->data[key.dsize]; 571 data.dsize = rec->datalen; 572 573 if (data.dsize < sizeof(struct ctdb_ltdb_header)) { 574 DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n")); 575 goto failed; 576 } 577 578 hdr = (struct ctdb_ltdb_header *)data.dptr; 579 /* Strip off any read only record flags. 580 * All readonly records are revoked implicitely by a recovery. 581 */ 582 hdr->flags &= ~CTDB_REC_RO_FLAGS; 583 584 data.dptr += sizeof(*hdr); 585 data.dsize -= sizeof(*hdr); 586 587 ret = ctdb_ltdb_store(state->ctdb_db, key, hdr, data); 588 if (ret != 0) { 589 DEBUG(DEBUG_ERR, 590 (__location__ " Unable to store record\n")); 591 goto failed; 592 } 593 594 rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec); 595 } 596 597 DEBUG(DEBUG_DEBUG, ("finished push of %u records for dbid 0x%x\n", 598 recs->count, recs->db_id)); 599 600 state->num_records += recs->count; 601 return; 602 603 failed: 604 state->failed = true; 605 } 606 607 int32_t ctdb_control_db_push_start(struct ctdb_context *ctdb, TDB_DATA indata) 608 { 609 struct ctdb_pulldb_ext *pulldb_ext; 610 struct ctdb_db_context *ctdb_db; 611 struct db_push_state *state; 612 int ret; 613 614 pulldb_ext = (struct ctdb_pulldb_ext *)indata.dptr; 615 616 ctdb_db = find_ctdb_db(ctdb, pulldb_ext->db_id); 617 if (ctdb_db == NULL) { 618 DEBUG(DEBUG_ERR, 619 (__location__ " Unknown db 0x%08x\n", pulldb_ext->db_id)); 620 return -1; 621 } 622 623 if (!ctdb_db_frozen(ctdb_db)) { 624 DEBUG(DEBUG_ERR, 625 ("rejecting ctdb_control_db_push_start when not frozen\n")); 626 return -1; 627 } 628 629 if (ctdb_db->push_started) { 630 DEBUG(DEBUG_WARNING, 631 (__location__ " DB push already started for %s\n", 632 ctdb_db->db_name)); 633 634 /* De-register old state */ 635 state = (struct db_push_state *)ctdb_db->push_state; 636 if (state != NULL) { 637 srvid_deregister(ctdb->srv, state->srvid, state); 638 talloc_free(state); 639 ctdb_db->push_state = NULL; 640 } 641 } 642 643 state = talloc_zero(ctdb_db, struct db_push_state); 644 if (state == NULL) { 645 DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n")); 646 return -1; 647 } 648 649 state->ctdb = ctdb; 650 state->ctdb_db = ctdb_db; 651 state->srvid = pulldb_ext->srvid; 652 state->failed = false; 653 654 ret = srvid_register(ctdb->srv, state, state->srvid, 655 db_push_msg_handler, state); 656 if (ret != 0) { 657 DEBUG(DEBUG_ERR, 658 (__location__ " Failed to register srvid for db push\n")); 659 talloc_free(state); 660 return -1; 661 } 662 663 if (ctdb_lockdb_mark(ctdb_db) != 0) { 664 DEBUG(DEBUG_ERR, 665 (__location__ " Failed to get lock on entire db - failing\n")); 666 srvid_deregister(ctdb->srv, state->srvid, state); 667 talloc_free(state); 668 return -1; 669 } 670 671 ctdb_db->push_started = true; 672 ctdb_db->push_state = state; 673 674 return 0; 675 } 676 677 int32_t ctdb_control_db_push_confirm(struct ctdb_context *ctdb, 678 TDB_DATA indata, TDB_DATA *outdata) 679 { 680 uint32_t db_id; 681 struct ctdb_db_context *ctdb_db; 682 struct db_push_state *state; 683 684 db_id = *(uint32_t *)indata.dptr; 685 686 ctdb_db = find_ctdb_db(ctdb, db_id); 687 if (ctdb_db == NULL) { 688 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id)); 689 return -1; 690 } 691 692 if (!ctdb_db_frozen(ctdb_db)) { 693 DEBUG(DEBUG_ERR, 694 ("rejecting ctdb_control_db_push_confirm when not frozen\n")); 695 return -1; 696 } 697 698 if (!ctdb_db->push_started) { 699 DEBUG(DEBUG_ERR, (__location__ " DB push not started\n")); 700 return -1; 701 } 702 703 if (ctdb_db->readonly) { 704 DEBUG(DEBUG_ERR, 705 ("Clearing the tracking database for dbid 0x%x\n", 706 ctdb_db->db_id)); 707 if (tdb_wipe_all(ctdb_db->rottdb) != 0) { 708 DEBUG(DEBUG_ERR, 709 ("Failed to wipe tracking database for 0x%x." 710 " Dropping read-only delegation support\n", 711 ctdb_db->db_id)); 712 ctdb_db->readonly = false; 713 tdb_close(ctdb_db->rottdb); 714 ctdb_db->rottdb = NULL; 715 ctdb_db->readonly = false; 716 } 717 718 while (ctdb_db->revokechild_active != NULL) { 719 talloc_free(ctdb_db->revokechild_active); 720 } 721 } 722 723 ctdb_lockdb_unmark(ctdb_db); 724 725 state = (struct db_push_state *)ctdb_db->push_state; 726 if (state == NULL) { 727 DEBUG(DEBUG_ERR, (__location__ " Missing push db state\n")); 728 return -1; 729 } 730 731 srvid_deregister(ctdb->srv, state->srvid, state); 732 733 outdata->dptr = talloc_size(outdata, sizeof(uint32_t)); 734 if (outdata->dptr == NULL) { 735 DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n")); 736 talloc_free(state); 737 ctdb_db->push_state = NULL; 738 return -1; 739 } 740 741 memcpy(outdata->dptr, (uint8_t *)&state->num_records, sizeof(uint32_t)); 742 outdata->dsize = sizeof(uint32_t); 743 744 talloc_free(state); 745 ctdb_db->push_started = false; 746 ctdb_db->push_state = NULL; 747 748 return 0; 408 749 } 409 750 … … 1033 1374 if (data.dsize < sizeof(struct ctdb_ltdb_header)) { 1034 1375 DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record in indata\n")); 1376 talloc_free(records); 1035 1377 return -1; 1036 1378 } … … 1225 1567 DEBUG(DEBUG_CRIT, (__location__ " bad ltdb record " 1226 1568 "in indata\n")); 1569 talloc_free(records); 1227 1570 return -1; 1228 1571 } 
- 
      vendor/current/ctdb/server/ctdb_recoverd.cr988 r989 253 253 uint32_t *force_rebalance_nodes; 254 254 struct ctdb_node_capabilities *caps; 255 bool frozen_on_inactive; 255 256 }; 256 257 … … 1782 1783 } 1783 1784 1785 setenv("CTDB_DBDIR_STATE", rec->ctdb->db_directory_state, 1); 1786 1784 1787 if (!ctdb_vfork_with_logging(state, rec->ctdb, "recovery", prog, nargs, 1785 1788 args, NULL, NULL, &state->pid)) { … … 1981 1984 1982 1985 DEBUG(DEBUG_NOTICE, (__location__ " Recovery - disabled recovery mode\n")); 1986 1987 /* execute the "recovered" event script on all nodes */ 1988 ret = run_recovered_eventscript(rec, nodemap, "do_recovery"); 1989 if (ret!=0) { 1990 DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster. Recovery process failed.\n")); 1991 return -1; 1992 } 1993 1994 DEBUG(DEBUG_NOTICE, (__location__ " Recovery - finished the recovered event\n")); 1983 1995 1984 1996 return 0; … … 2157 2169 do_takeover_run(rec, nodemap, false); 2158 2170 2159 /* execute the "recovered" event script on all nodes */2160 ret = run_recovered_eventscript(rec, nodemap, "do_recovery");2161 if (ret!=0) {2162 DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster. Recovery process failed.\n"));2163 goto fail;2164 }2165 2166 DEBUG(DEBUG_NOTICE, (__location__ " Recovery - finished the recovered event\n"));2167 2168 2171 /* send a message to all clients telling them that the cluster 2169 2172 has been reconfigured */ … … 2659 2662 } 2660 2663 2664 /* 2665 * handler for assigning banning credits 2666 */ 2667 static void banning_handler(uint64_t srvid, TDB_DATA data, void *private_data) 2668 { 2669 struct ctdb_recoverd *rec = talloc_get_type( 2670 private_data, struct ctdb_recoverd); 2671 uint32_t ban_pnn; 2672 2673 /* Ignore if we are not recmaster */ 2674 if (rec->ctdb->pnn != rec->recmaster) { 2675 return; 2676 } 2677 2678 if (data.dsize != sizeof(uint32_t)) { 2679 DEBUG(DEBUG_ERR, (__location__ "invalid data size %zu\n", 2680 data.dsize)); 2681 return; 2682 } 2683 2684 ban_pnn = *(uint32_t *)data.dptr; 2685 2686 ctdb_set_culprit_count(rec, ban_pnn, rec->nodemap->num); 2687 } 2661 2688 2662 2689 /* … … 3490 3517 return; 3491 3518 } 3492 ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE); 3519 } 3520 if (! rec->frozen_on_inactive) { 3521 ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), 3522 CTDB_CURRENT_NODE); 3493 3523 if (ret != 0) { 3494 DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node in STOPPED or BANNED state\n")); 3524 DEBUG(DEBUG_ERR, 3525 (__location__ " Failed to freeze node " 3526 "in STOPPED or BANNED state\n")); 3495 3527 return; 3496 3528 } 3529 3530 rec->frozen_on_inactive = true; 3497 3531 } 3498 3532 … … 3503 3537 return; 3504 3538 } 3539 3540 rec->frozen_on_inactive = false; 3505 3541 3506 3542 /* If we are not the recmaster then do some housekeeping */ … … 3883 3919 3884 3920 rec->priority_time = timeval_current(); 3921 rec->frozen_on_inactive = false; 3885 3922 3886 3923 /* register a message port for sending memory dumps */ 3887 3924 ctdb_client_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec); 3925 3926 /* when a node is assigned banning credits */ 3927 ctdb_client_set_message_handler(ctdb, CTDB_SRVID_BANNING, 3928 banning_handler, rec); 3888 3929 3889 3930 /* register a message port for recovery elections */ 
- 
      vendor/current/ctdb/server/ctdb_recovery_helper.cr988 r989 35 35 #include "client/client.h" 36 36 37 #define TIMEOUT() timeval_current_ofs(10, 0) 37 static int recover_timeout = 30; 38 39 #define NUM_RETRIES 3 40 41 #define TIMEOUT() timeval_current_ofs(recover_timeout, 0) 38 42 39 43 static void LOG(const char *fmt, ...) … … 64 68 } 65 69 70 static bool generic_recv(struct tevent_req *req, int *perr) 71 { 72 int err; 73 74 if (tevent_req_is_unix_error(req, &err)) { 75 if (perr != NULL) { 76 *perr = err; 77 } 78 return false; 79 } 80 81 return true; 82 } 83 84 static uint64_t rec_srvid = CTDB_SRVID_RECOVERY; 85 86 static uint64_t srvid_next(void) 87 { 88 rec_srvid += 1; 89 return rec_srvid; 90 } 91 66 92 /* 67 93 * Recovery database functions … … 81 107 uint32_t hash_size, bool persistent) 82 108 { 109 static char *db_dir_state = NULL; 83 110 struct recdb_context *recdb; 84 111 unsigned int tdb_flags; … … 89 116 } 90 117 118 if (db_dir_state == NULL) { 119 db_dir_state = getenv("CTDB_DBDIR_STATE"); 120 } 121 91 122 recdb->db_name = db_name; 92 123 recdb->db_id = db_id; 93 124 recdb->db_path = talloc_asprintf(recdb, "%s/recdb.%s", 94 dirname(discard_const(db_path)), 125 db_dir_state != NULL ? 126 db_dir_state : 127 dirname(discard_const(db_path)), 95 128 db_name); 96 129 if (recdb->db_path == NULL) { … … 113 146 } 114 147 148 static uint32_t recdb_id(struct recdb_context *recdb) 149 { 150 return recdb->db_id; 151 } 152 115 153 static const char *recdb_name(struct recdb_context *recdb) 116 154 { 117 155 return recdb->db_name; 156 } 157 158 static const char *recdb_path(struct recdb_context *recdb) 159 { 160 return recdb->db_path; 161 } 162 163 static struct tdb_context *recdb_tdb(struct recdb_context *recdb) 164 { 165 return recdb->db->tdb; 166 } 167 168 static bool recdb_persistent(struct recdb_context *recdb) 169 { 170 return recdb->persistent; 118 171 } 119 172 … … 141 194 142 195 /* fetch the existing record, if any */ 143 prev_data = tdb_fetch( state->recdb->db->tdb, key);196 prev_data = tdb_fetch(recdb_tdb(state->recdb), key); 144 197 145 198 if (prev_data.dptr != NULL) { … … 155 208 } 156 209 157 ret = tdb_store( state->recdb->db->tdb, key, data, TDB_REPLACE);210 ret = tdb_store(recdb_tdb(state->recdb), key, data, TDB_REPLACE); 158 211 if (ret != 0) { 159 212 return -1; … … 179 232 } 180 233 181 struct recdb_traverse_state { 182 struct ctdb_rec_buffer *recbuf; 183 uint32_t pnn; 184 uint32_t reqid; 185 bool persistent; 186 bool failed; 187 }; 188 189 static int recdb_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, 190 void *private_data) 191 { 192 struct recdb_traverse_state *state = 193 (struct recdb_traverse_state *)private_data; 234 /* This function decides which records from recdb are retained */ 235 static int recbuf_filter_add(struct ctdb_rec_buffer *recbuf, bool persistent, 236 uint32_t reqid, uint32_t dmaster, 237 TDB_DATA key, TDB_DATA data) 238 { 194 239 struct ctdb_ltdb_header *header; 195 240 int ret; … … 225 270 * data structures built from the various tdb-level records. 226 271 */ 227 if (!state->persistent && 228 data.dsize <= sizeof(struct ctdb_ltdb_header)) { 272 if (!persistent && data.dsize <= sizeof(struct ctdb_ltdb_header)) { 229 273 return 0; 230 274 } … … 232 276 /* update the dmaster field to point to us */ 233 277 header = (struct ctdb_ltdb_header *)data.dptr; 234 if (! state->persistent) {235 header->dmaster = state->pnn;278 if (!persistent) { 279 header->dmaster = dmaster; 236 280 header->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA; 237 281 } 238 282 239 ret = ctdb_rec_buffer_add(state->recbuf, state->recbuf, state->reqid, 240 NULL, key, data); 283 ret = ctdb_rec_buffer_add(recbuf, recbuf, reqid, NULL, key, data); 284 if (ret != 0) { 285 return ret; 286 } 287 288 return 0; 289 } 290 291 struct recdb_records_traverse_state { 292 struct ctdb_rec_buffer *recbuf; 293 uint32_t dmaster; 294 uint32_t reqid; 295 bool persistent; 296 bool failed; 297 }; 298 299 static int recdb_records_traverse(struct tdb_context *tdb, 300 TDB_DATA key, TDB_DATA data, 301 void *private_data) 302 { 303 struct recdb_records_traverse_state *state = 304 (struct recdb_records_traverse_state *)private_data; 305 int ret; 306 307 ret = recbuf_filter_add(state->recbuf, state->persistent, 308 state->reqid, state->dmaster, key, data); 241 309 if (ret != 0) { 242 310 state->failed = true; … … 248 316 249 317 static struct ctdb_rec_buffer *recdb_records(struct recdb_context *recdb, 250 TALLOC_CTX *mem_ctx, uint32_t pnn) 251 { 252 struct recdb_traverse_state state; 253 int ret; 254 255 state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb->db_id); 318 TALLOC_CTX *mem_ctx, 319 uint32_t dmaster) 320 { 321 struct recdb_records_traverse_state state; 322 int ret; 323 324 state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb_id(recdb)); 256 325 if (state.recbuf == NULL) { 257 326 return NULL; 258 327 } 259 state. pnn = pnn;328 state.dmaster = dmaster; 260 329 state.reqid = 0; 261 state.persistent = recdb ->persistent;330 state.persistent = recdb_persistent(recdb); 262 331 state.failed = false; 263 332 264 ret = tdb_traverse_read(recdb->db->tdb, recdb_traverse, &state); 333 ret = tdb_traverse_read(recdb_tdb(recdb), recdb_records_traverse, 334 &state); 335 if (ret == -1 || state.failed) { 336 LOG("Failed to marshall recovery records for %s\n", 337 recdb_name(recdb)); 338 TALLOC_FREE(state.recbuf); 339 return NULL; 340 } 341 342 return state.recbuf; 343 } 344 345 struct recdb_file_traverse_state { 346 struct ctdb_rec_buffer *recbuf; 347 struct recdb_context *recdb; 348 TALLOC_CTX *mem_ctx; 349 uint32_t dmaster; 350 uint32_t reqid; 351 bool persistent; 352 bool failed; 353 int fd; 354 int max_size; 355 int num_buffers; 356 }; 357 358 static int recdb_file_traverse(struct tdb_context *tdb, 359 TDB_DATA key, TDB_DATA data, 360 void *private_data) 361 { 362 struct recdb_file_traverse_state *state = 363 (struct recdb_file_traverse_state *)private_data; 364 int ret; 365 366 ret = recbuf_filter_add(state->recbuf, state->persistent, 367 state->reqid, state->dmaster, key, data); 368 if (ret != 0) { 369 state->failed = true; 370 return ret; 371 } 372 373 if (ctdb_rec_buffer_len(state->recbuf) > state->max_size) { 374 ret = ctdb_rec_buffer_write(state->recbuf, state->fd); 375 if (ret != 0) { 376 LOG("Failed to collect recovery records for %s\n", 377 recdb_name(state->recdb)); 378 state->failed = true; 379 return ret; 380 } 381 382 state->num_buffers += 1; 383 384 TALLOC_FREE(state->recbuf); 385 state->recbuf = ctdb_rec_buffer_init(state->mem_ctx, 386 recdb_id(state->recdb)); 387 if (state->recbuf == NULL) { 388 state->failed = true; 389 return ENOMEM; 390 } 391 } 392 393 return 0; 394 } 395 396 static int recdb_file(struct recdb_context *recdb, TALLOC_CTX *mem_ctx, 397 uint32_t dmaster, int fd, int max_size) 398 { 399 struct recdb_file_traverse_state state; 400 int ret; 401 402 state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb_id(recdb)); 403 if (state.recbuf == NULL) { 404 return -1; 405 } 406 state.recdb = recdb; 407 state.mem_ctx = mem_ctx; 408 state.dmaster = dmaster; 409 state.reqid = 0; 410 state.persistent = recdb_persistent(recdb); 411 state.failed = false; 412 state.fd = fd; 413 state.max_size = max_size; 414 state.num_buffers = 0; 415 416 ret = tdb_traverse_read(recdb_tdb(recdb), recdb_file_traverse, &state); 265 417 if (ret == -1 || state.failed) { 266 418 TALLOC_FREE(state.recbuf); 419 return -1; 420 } 421 422 ret = ctdb_rec_buffer_write(state.recbuf, fd); 423 if (ret != 0) { 424 LOG("Failed to collect recovery records for %s\n", 425 recdb_name(recdb)); 426 TALLOC_FREE(state.recbuf); 427 return -1; 428 } 429 state.num_buffers += 1; 430 431 LOG("Wrote %d buffers of recovery records for %s\n", 432 state.num_buffers, recdb_name(recdb)); 433 434 return state.num_buffers; 435 } 436 437 /* 438 * Pull database from a single node 439 */ 440 441 struct pull_database_state { 442 struct tevent_context *ev; 443 struct ctdb_client_context *client; 444 struct recdb_context *recdb; 445 uint32_t pnn; 446 uint64_t srvid; 447 int num_records; 448 }; 449 450 static void pull_database_handler(uint64_t srvid, TDB_DATA data, 451 void *private_data); 452 static void pull_database_register_done(struct tevent_req *subreq); 453 static void pull_database_old_done(struct tevent_req *subreq); 454 static void pull_database_unregister_done(struct tevent_req *subreq); 455 static void pull_database_new_done(struct tevent_req *subreq); 456 457 static struct tevent_req *pull_database_send( 458 TALLOC_CTX *mem_ctx, 459 struct tevent_context *ev, 460 struct ctdb_client_context *client, 461 uint32_t pnn, uint32_t caps, 462 struct recdb_context *recdb) 463 { 464 struct tevent_req *req, *subreq; 465 struct pull_database_state *state; 466 struct ctdb_req_control request; 467 468 req = tevent_req_create(mem_ctx, &state, struct pull_database_state); 469 if (req == NULL) { 267 470 return NULL; 268 471 } 269 472 270 return state.recbuf; 473 state->ev = ev; 474 state->client = client; 475 state->recdb = recdb; 476 state->pnn = pnn; 477 state->srvid = srvid_next(); 478 479 if (caps & CTDB_CAP_FRAGMENTED_CONTROLS) { 480 subreq = ctdb_client_set_message_handler_send( 481 state, state->ev, state->client, 482 state->srvid, pull_database_handler, 483 req); 484 if (tevent_req_nomem(subreq, req)) { 485 return tevent_req_post(req, ev); 486 } 487 488 tevent_req_set_callback(subreq, pull_database_register_done, 489 req); 490 491 } else { 492 struct ctdb_pulldb pulldb; 493 494 pulldb.db_id = recdb_id(recdb); 495 pulldb.lmaster = CTDB_LMASTER_ANY; 496 497 ctdb_req_control_pull_db(&request, &pulldb); 498 subreq = ctdb_client_control_send(state, state->ev, 499 state->client, 500 pnn, TIMEOUT(), 501 &request); 502 if (tevent_req_nomem(subreq, req)) { 503 return tevent_req_post(req, ev); 504 } 505 tevent_req_set_callback(subreq, pull_database_old_done, req); 506 } 507 508 return req; 509 } 510 511 static void pull_database_handler(uint64_t srvid, TDB_DATA data, 512 void *private_data) 513 { 514 struct tevent_req *req = talloc_get_type_abort( 515 private_data, struct tevent_req); 516 struct pull_database_state *state = tevent_req_data( 517 req, struct pull_database_state); 518 struct ctdb_rec_buffer *recbuf; 519 int ret; 520 bool status; 521 522 if (srvid != state->srvid) { 523 return; 524 } 525 526 ret = ctdb_rec_buffer_pull(data.dptr, data.dsize, state, &recbuf); 527 if (ret != 0) { 528 LOG("Invalid data received for DB_PULL messages\n"); 529 return; 530 } 531 532 if (recbuf->db_id != recdb_id(state->recdb)) { 533 talloc_free(recbuf); 534 LOG("Invalid dbid:%08x for DB_PULL messages for %s\n", 535 recbuf->db_id, recdb_name(state->recdb)); 536 return; 537 } 538 539 status = recdb_add(state->recdb, ctdb_client_pnn(state->client), 540 recbuf); 541 if (! status) { 542 talloc_free(recbuf); 543 LOG("Failed to add records to recdb for %s\n", 544 recdb_name(state->recdb)); 545 return; 546 } 547 548 state->num_records += recbuf->count; 549 talloc_free(recbuf); 550 } 551 552 static void pull_database_register_done(struct tevent_req *subreq) 553 { 554 struct tevent_req *req = tevent_req_callback_data( 555 subreq, struct tevent_req); 556 struct pull_database_state *state = tevent_req_data( 557 req, struct pull_database_state); 558 struct ctdb_req_control request; 559 struct ctdb_pulldb_ext pulldb_ext; 560 int ret; 561 bool status; 562 563 status = ctdb_client_set_message_handler_recv(subreq, &ret); 564 TALLOC_FREE(subreq); 565 if (! status) { 566 LOG("failed to set message handler for DB_PULL for %s\n", 567 recdb_name(state->recdb)); 568 tevent_req_error(req, ret); 569 return; 570 } 571 572 pulldb_ext.db_id = recdb_id(state->recdb); 573 pulldb_ext.lmaster = CTDB_LMASTER_ANY; 574 pulldb_ext.srvid = state->srvid; 575 576 ctdb_req_control_db_pull(&request, &pulldb_ext); 577 subreq = ctdb_client_control_send(state, state->ev, state->client, 578 state->pnn, TIMEOUT(), &request); 579 if (tevent_req_nomem(subreq, req)) { 580 return; 581 } 582 tevent_req_set_callback(subreq, pull_database_new_done, req); 583 } 584 585 static void pull_database_old_done(struct tevent_req *subreq) 586 { 587 struct tevent_req *req = tevent_req_callback_data( 588 subreq, struct tevent_req); 589 struct pull_database_state *state = tevent_req_data( 590 req, struct pull_database_state); 591 struct ctdb_reply_control *reply; 592 struct ctdb_rec_buffer *recbuf; 593 int ret; 594 bool status; 595 596 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 597 TALLOC_FREE(subreq); 598 if (! status) { 599 LOG("control PULL_DB failed for %s on node %u, ret=%d\n", 600 recdb_name(state->recdb), state->pnn, ret); 601 tevent_req_error(req, ret); 602 return; 603 } 604 605 ret = ctdb_reply_control_pull_db(reply, state, &recbuf); 606 talloc_free(reply); 607 if (ret != 0) { 608 tevent_req_error(req, ret); 609 return; 610 } 611 612 status = recdb_add(state->recdb, ctdb_client_pnn(state->client), 613 recbuf); 614 if (! status) { 615 talloc_free(recbuf); 616 tevent_req_error(req, EIO); 617 return; 618 } 619 620 state->num_records = recbuf->count; 621 talloc_free(recbuf); 622 623 LOG("Pulled %d records for db %s from node %d\n", 624 state->num_records, recdb_name(state->recdb), state->pnn); 625 626 tevent_req_done(req); 627 } 628 629 static void pull_database_new_done(struct tevent_req *subreq) 630 { 631 struct tevent_req *req = tevent_req_callback_data( 632 subreq, struct tevent_req); 633 struct pull_database_state *state = tevent_req_data( 634 req, struct pull_database_state); 635 struct ctdb_reply_control *reply; 636 uint32_t num_records; 637 int ret; 638 bool status; 639 640 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 641 TALLOC_FREE(subreq); 642 if (! status) { 643 LOG("control DB_PULL failed for %s on node %u, ret=%d\n", 644 recdb_name(state->recdb), state->pnn, ret); 645 tevent_req_error(req, ret); 646 return; 647 } 648 649 ret = ctdb_reply_control_db_pull(reply, &num_records); 650 talloc_free(reply); 651 if (num_records != state->num_records) { 652 LOG("mismatch (%u != %u) in DB_PULL records for %s\n", 653 num_records, state->num_records, recdb_name(state->recdb)); 654 tevent_req_error(req, EIO); 655 return; 656 } 657 658 LOG("Pulled %d records for db %s from node %d\n", 659 state->num_records, recdb_name(state->recdb), state->pnn); 660 661 subreq = ctdb_client_remove_message_handler_send( 662 state, state->ev, state->client, 663 state->srvid, req); 664 if (tevent_req_nomem(subreq, req)) { 665 return; 666 } 667 tevent_req_set_callback(subreq, pull_database_unregister_done, req); 668 } 669 670 static void pull_database_unregister_done(struct tevent_req *subreq) 671 { 672 struct tevent_req *req = tevent_req_callback_data( 673 subreq, struct tevent_req); 674 struct pull_database_state *state = tevent_req_data( 675 req, struct pull_database_state); 676 int ret; 677 bool status; 678 679 status = ctdb_client_remove_message_handler_recv(subreq, &ret); 680 TALLOC_FREE(subreq); 681 if (! status) { 682 LOG("failed to remove message handler for DB_PULL for %s\n", 683 recdb_name(state->recdb)); 684 tevent_req_error(req, ret); 685 return; 686 } 687 688 tevent_req_done(req); 689 } 690 691 static bool pull_database_recv(struct tevent_req *req, int *perr) 692 { 693 return generic_recv(req, perr); 694 } 695 696 /* 697 * Push database to specified nodes (old style) 698 */ 699 700 struct push_database_old_state { 701 struct tevent_context *ev; 702 struct ctdb_client_context *client; 703 struct recdb_context *recdb; 704 uint32_t *pnn_list; 705 int count; 706 struct ctdb_rec_buffer *recbuf; 707 int index; 708 }; 709 710 static void push_database_old_push_done(struct tevent_req *subreq); 711 712 static struct tevent_req *push_database_old_send( 713 TALLOC_CTX *mem_ctx, 714 struct tevent_context *ev, 715 struct ctdb_client_context *client, 716 uint32_t *pnn_list, int count, 717 struct recdb_context *recdb) 718 { 719 struct tevent_req *req, *subreq; 720 struct push_database_old_state *state; 721 struct ctdb_req_control request; 722 uint32_t pnn; 723 724 req = tevent_req_create(mem_ctx, &state, 725 struct push_database_old_state); 726 if (req == NULL) { 727 return NULL; 728 } 729 730 state->ev = ev; 731 state->client = client; 732 state->recdb = recdb; 733 state->pnn_list = pnn_list; 734 state->count = count; 735 state->index = 0; 736 737 state->recbuf = recdb_records(recdb, state, 738 ctdb_client_pnn(client)); 739 if (tevent_req_nomem(state->recbuf, req)) { 740 return tevent_req_post(req, ev); 741 } 742 743 pnn = state->pnn_list[state->index]; 744 745 ctdb_req_control_push_db(&request, state->recbuf); 746 subreq = ctdb_client_control_send(state, ev, client, pnn, 747 TIMEOUT(), &request); 748 if (tevent_req_nomem(subreq, req)) { 749 return tevent_req_post(req, ev); 750 } 751 tevent_req_set_callback(subreq, push_database_old_push_done, req); 752 753 return req; 754 } 755 756 static void push_database_old_push_done(struct tevent_req *subreq) 757 { 758 struct tevent_req *req = tevent_req_callback_data( 759 subreq, struct tevent_req); 760 struct push_database_old_state *state = tevent_req_data( 761 req, struct push_database_old_state); 762 struct ctdb_req_control request; 763 uint32_t pnn; 764 int ret; 765 bool status; 766 767 status = ctdb_client_control_recv(subreq, &ret, NULL, NULL); 768 TALLOC_FREE(subreq); 769 if (! status) { 770 LOG("control PUSH_DB failed for db %s on node %u, ret=%d\n", 771 recdb_name(state->recdb), state->pnn_list[state->index], 772 ret); 773 tevent_req_error(req, ret); 774 return; 775 } 776 777 state->index += 1; 778 if (state->index == state->count) { 779 TALLOC_FREE(state->recbuf); 780 tevent_req_done(req); 781 return; 782 } 783 784 pnn = state->pnn_list[state->index]; 785 786 ctdb_req_control_push_db(&request, state->recbuf); 787 subreq = ctdb_client_control_send(state, state->ev, state->client, 788 pnn, TIMEOUT(), &request); 789 if (tevent_req_nomem(subreq, req)) { 790 return; 791 } 792 tevent_req_set_callback(subreq, push_database_old_push_done, req); 793 } 794 795 static bool push_database_old_recv(struct tevent_req *req, int *perr) 796 { 797 return generic_recv(req, perr); 798 } 799 800 /* 801 * Push database to specified nodes (new style) 802 */ 803 804 struct push_database_new_state { 805 struct tevent_context *ev; 806 struct ctdb_client_context *client; 807 struct recdb_context *recdb; 808 uint32_t *pnn_list; 809 int count; 810 uint64_t srvid; 811 uint32_t dmaster; 812 int fd; 813 int num_buffers; 814 int num_buffers_sent; 815 int num_records; 816 }; 817 818 static void push_database_new_started(struct tevent_req *subreq); 819 static void push_database_new_send_msg(struct tevent_req *req); 820 static void push_database_new_send_done(struct tevent_req *subreq); 821 static void push_database_new_confirmed(struct tevent_req *subreq); 822 823 static struct tevent_req *push_database_new_send( 824 TALLOC_CTX *mem_ctx, 825 struct tevent_context *ev, 826 struct ctdb_client_context *client, 827 uint32_t *pnn_list, int count, 828 struct recdb_context *recdb, 829 int max_size) 830 { 831 struct tevent_req *req, *subreq; 832 struct push_database_new_state *state; 833 struct ctdb_req_control request; 834 struct ctdb_pulldb_ext pulldb_ext; 835 char *filename; 836 off_t offset; 837 838 req = tevent_req_create(mem_ctx, &state, 839 struct push_database_new_state); 840 if (req == NULL) { 841 return NULL; 842 } 843 844 state->ev = ev; 845 state->client = client; 846 state->recdb = recdb; 847 state->pnn_list = pnn_list; 848 state->count = count; 849 850 state->srvid = srvid_next(); 851 state->dmaster = ctdb_client_pnn(client); 852 state->num_buffers_sent = 0; 853 state->num_records = 0; 854 855 filename = talloc_asprintf(state, "%s.dat", recdb_path(recdb)); 856 if (tevent_req_nomem(filename, req)) { 857 return tevent_req_post(req, ev); 858 } 859 860 state->fd = open(filename, O_RDWR|O_CREAT, 0644); 861 if (state->fd == -1) { 862 tevent_req_error(req, errno); 863 return tevent_req_post(req, ev); 864 } 865 unlink(filename); 866 talloc_free(filename); 867 868 state->num_buffers = recdb_file(recdb, state, state->dmaster, 869 state->fd, max_size); 870 if (state->num_buffers == -1) { 871 tevent_req_error(req, ENOMEM); 872 return tevent_req_post(req, ev); 873 } 874 875 offset = lseek(state->fd, 0, SEEK_SET); 876 if (offset != 0) { 877 tevent_req_error(req, EIO); 878 return tevent_req_post(req, ev); 879 } 880 881 pulldb_ext.db_id = recdb_id(recdb); 882 pulldb_ext.srvid = state->srvid; 883 884 ctdb_req_control_db_push_start(&request, &pulldb_ext); 885 subreq = ctdb_client_control_multi_send(state, ev, client, 886 pnn_list, count, 887 TIMEOUT(), &request); 888 if (tevent_req_nomem(subreq, req)) { 889 return tevent_req_post(req, ev); 890 } 891 tevent_req_set_callback(subreq, push_database_new_started, req); 892 893 return req; 894 } 895 896 static void push_database_new_started(struct tevent_req *subreq) 897 { 898 struct tevent_req *req = tevent_req_callback_data( 899 subreq, struct tevent_req); 900 struct push_database_new_state *state = tevent_req_data( 901 req, struct push_database_new_state); 902 int *err_list; 903 int ret; 904 bool status; 905 906 status = ctdb_client_control_multi_recv(subreq, &ret, state, 907 &err_list, NULL); 908 TALLOC_FREE(subreq); 909 if (! status) { 910 int ret2; 911 uint32_t pnn; 912 913 ret2 = ctdb_client_control_multi_error(state->pnn_list, 914 state->count, 915 err_list, &pnn); 916 if (ret2 != 0) { 917 LOG("control DB_PUSH_START failed for db %s " 918 "on node %u, ret=%d\n", 919 recdb_name(state->recdb), pnn, ret2); 920 } else { 921 LOG("control DB_PUSH_START failed for db %s, ret=%d\n", 922 recdb_name(state->recdb), ret); 923 } 924 talloc_free(err_list); 925 926 tevent_req_error(req, ret); 927 return; 928 } 929 930 push_database_new_send_msg(req); 931 } 932 933 static void push_database_new_send_msg(struct tevent_req *req) 934 { 935 struct push_database_new_state *state = tevent_req_data( 936 req, struct push_database_new_state); 937 struct tevent_req *subreq; 938 struct ctdb_rec_buffer *recbuf; 939 struct ctdb_req_message message; 940 TDB_DATA data; 941 int ret; 942 943 if (state->num_buffers_sent == state->num_buffers) { 944 struct ctdb_req_control request; 945 946 ctdb_req_control_db_push_confirm(&request, 947 recdb_id(state->recdb)); 948 subreq = ctdb_client_control_multi_send(state, state->ev, 949 state->client, 950 state->pnn_list, 951 state->count, 952 TIMEOUT(), &request); 953 if (tevent_req_nomem(subreq, req)) { 954 return; 955 } 956 tevent_req_set_callback(subreq, push_database_new_confirmed, 957 req); 958 return; 959 } 960 961 ret = ctdb_rec_buffer_read(state->fd, state, &recbuf); 962 if (ret != 0) { 963 tevent_req_error(req, ret); 964 return; 965 } 966 967 data.dsize = ctdb_rec_buffer_len(recbuf); 968 data.dptr = talloc_size(state, data.dsize); 969 if (tevent_req_nomem(data.dptr, req)) { 970 return; 971 } 972 973 ctdb_rec_buffer_push(recbuf, data.dptr); 974 975 message.srvid = state->srvid; 976 message.data.data = data; 977 978 LOG("Pushing buffer %d with %d records for %s\n", 979 state->num_buffers_sent, recbuf->count, recdb_name(state->recdb)); 980 981 subreq = ctdb_client_message_multi_send(state, state->ev, 982 state->client, 983 state->pnn_list, state->count, 984 &message); 985 if (tevent_req_nomem(subreq, req)) { 986 return; 987 } 988 tevent_req_set_callback(subreq, push_database_new_send_done, req); 989 990 state->num_records += recbuf->count; 991 992 talloc_free(data.dptr); 993 talloc_free(recbuf); 994 } 995 996 static void push_database_new_send_done(struct tevent_req *subreq) 997 { 998 struct tevent_req *req = tevent_req_callback_data( 999 subreq, struct tevent_req); 1000 struct push_database_new_state *state = tevent_req_data( 1001 req, struct push_database_new_state); 1002 bool status; 1003 int ret; 1004 1005 status = ctdb_client_message_multi_recv(subreq, &ret, NULL, NULL); 1006 TALLOC_FREE(subreq); 1007 if (! status) { 1008 LOG("Sending recovery records failed for %s\n", 1009 recdb_name(state->recdb)); 1010 tevent_req_error(req, ret); 1011 return; 1012 } 1013 1014 state->num_buffers_sent += 1; 1015 1016 push_database_new_send_msg(req); 1017 } 1018 1019 static void push_database_new_confirmed(struct tevent_req *subreq) 1020 { 1021 struct tevent_req *req = tevent_req_callback_data( 1022 subreq, struct tevent_req); 1023 struct push_database_new_state *state = tevent_req_data( 1024 req, struct push_database_new_state); 1025 struct ctdb_reply_control **reply; 1026 int *err_list; 1027 bool status; 1028 int ret, i; 1029 uint32_t num_records; 1030 1031 status = ctdb_client_control_multi_recv(subreq, &ret, state, 1032 &err_list, &reply); 1033 TALLOC_FREE(subreq); 1034 if (! status) { 1035 int ret2; 1036 uint32_t pnn; 1037 1038 ret2 = ctdb_client_control_multi_error(state->pnn_list, 1039 state->count, err_list, 1040 &pnn); 1041 if (ret2 != 0) { 1042 LOG("control DB_PUSH_CONFIRM failed for %s on node %u," 1043 " ret=%d\n", recdb_name(state->recdb), pnn, ret2); 1044 } else { 1045 LOG("control DB_PUSH_CONFIRM failed for %s, ret=%d\n", 1046 recdb_name(state->recdb), ret); 1047 } 1048 tevent_req_error(req, ret); 1049 return; 1050 } 1051 1052 for (i=0; i<state->count; i++) { 1053 ret = ctdb_reply_control_db_push_confirm(reply[i], 1054 &num_records); 1055 if (ret != 0) { 1056 tevent_req_error(req, EPROTO); 1057 return; 1058 } 1059 1060 if (num_records != state->num_records) { 1061 LOG("Node %u received %d of %d records for %s\n", 1062 state->pnn_list[i], num_records, 1063 state->num_records, recdb_name(state->recdb)); 1064 tevent_req_error(req, EPROTO); 1065 return; 1066 } 1067 } 1068 1069 talloc_free(reply); 1070 1071 LOG("Pushed %d records for db %s\n", 1072 state->num_records, recdb_name(state->recdb)); 1073 1074 tevent_req_done(req); 1075 } 1076 1077 static bool push_database_new_recv(struct tevent_req *req, int *perr) 1078 { 1079 return generic_recv(req, perr); 1080 } 1081 1082 /* 1083 * wrapper for push_database_old and push_database_new 1084 */ 1085 1086 struct push_database_state { 1087 bool old_done, new_done; 1088 }; 1089 1090 static void push_database_old_done(struct tevent_req *subreq); 1091 static void push_database_new_done(struct tevent_req *subreq); 1092 1093 static struct tevent_req *push_database_send( 1094 TALLOC_CTX *mem_ctx, 1095 struct tevent_context *ev, 1096 struct ctdb_client_context *client, 1097 uint32_t *pnn_list, int count, uint32_t *caps, 1098 struct ctdb_tunable_list *tun_list, 1099 struct recdb_context *recdb) 1100 { 1101 struct tevent_req *req, *subreq; 1102 struct push_database_state *state; 1103 uint32_t *old_list, *new_list; 1104 int old_count, new_count; 1105 int i; 1106 1107 req = tevent_req_create(mem_ctx, &state, struct push_database_state); 1108 if (req == NULL) { 1109 return NULL; 1110 } 1111 1112 state->old_done = false; 1113 state->new_done = false; 1114 1115 old_count = 0; 1116 new_count = 0; 1117 old_list = talloc_array(state, uint32_t, count); 1118 new_list = talloc_array(state, uint32_t, count); 1119 if (tevent_req_nomem(old_list, req) || 1120 tevent_req_nomem(new_list,req)) { 1121 return tevent_req_post(req, ev); 1122 } 1123 1124 for (i=0; i<count; i++) { 1125 uint32_t pnn = pnn_list[i]; 1126 1127 if (caps[pnn] & CTDB_CAP_FRAGMENTED_CONTROLS) { 1128 new_list[new_count] = pnn; 1129 new_count += 1; 1130 } else { 1131 old_list[old_count] = pnn; 1132 old_count += 1; 1133 } 1134 } 1135 1136 if (old_count > 0) { 1137 subreq = push_database_old_send(state, ev, client, 1138 old_list, old_count, recdb); 1139 if (tevent_req_nomem(subreq, req)) { 1140 return tevent_req_post(req, ev); 1141 } 1142 tevent_req_set_callback(subreq, push_database_old_done, req); 1143 } else { 1144 state->old_done = true; 1145 } 1146 1147 if (new_count > 0) { 1148 subreq = push_database_new_send(state, ev, client, 1149 new_list, new_count, recdb, 1150 tun_list->rec_buffer_size_limit); 1151 if (tevent_req_nomem(subreq, req)) { 1152 return tevent_req_post(req, ev); 1153 } 1154 tevent_req_set_callback(subreq, push_database_new_done, req); 1155 } else { 1156 state->new_done = true; 1157 } 1158 1159 return req; 1160 } 1161 1162 static void push_database_old_done(struct tevent_req *subreq) 1163 { 1164 struct tevent_req *req = tevent_req_callback_data( 1165 subreq, struct tevent_req); 1166 struct push_database_state *state = tevent_req_data( 1167 req, struct push_database_state); 1168 bool status; 1169 int ret; 1170 1171 status = push_database_old_recv(subreq, &ret); 1172 if (! status) { 1173 tevent_req_error(req, ret); 1174 return; 1175 } 1176 1177 state->old_done = true; 1178 1179 if (state->old_done && state->new_done) { 1180 tevent_req_done(req); 1181 } 1182 } 1183 1184 static void push_database_new_done(struct tevent_req *subreq) 1185 { 1186 struct tevent_req *req = tevent_req_callback_data( 1187 subreq, struct tevent_req); 1188 struct push_database_state *state = tevent_req_data( 1189 req, struct push_database_state); 1190 bool status; 1191 int ret; 1192 1193 status = push_database_new_recv(subreq, &ret); 1194 if (! status) { 1195 tevent_req_error(req, ret); 1196 return; 1197 } 1198 1199 state->new_done = true; 1200 1201 if (state->old_done && state->new_done) { 1202 tevent_req_done(req); 1203 } 1204 } 1205 1206 static bool push_database_recv(struct tevent_req *req, int *perr) 1207 { 1208 return generic_recv(req, perr); 271 1209 } 272 1210 … … 280 1218 uint32_t *pnn_list; 281 1219 int count; 1220 uint32_t *caps; 1221 uint32_t *ban_credits; 282 1222 uint32_t db_id; 283 1223 struct recdb_context *recdb; … … 292 1232 struct tevent_context *ev, 293 1233 struct ctdb_client_context *client, 294 uint32_t *pnn_list, int count, 295 uint32_t db_id, struct recdb_context *recdb) 1234 uint32_t *pnn_list, int count, uint32_t *caps, 1235 uint32_t *ban_credits, uint32_t db_id, 1236 struct recdb_context *recdb) 296 1237 { 297 1238 struct tevent_req *req, *subreq; … … 309 1250 state->pnn_list = pnn_list; 310 1251 state->count = count; 1252 state->caps = caps; 1253 state->ban_credits = ban_credits; 311 1254 state->db_id = db_id; 312 1255 state->recdb = recdb; … … 332 1275 req, struct collect_highseqnum_db_state); 333 1276 struct ctdb_reply_control **reply; 334 struct ctdb_req_control request;335 struct ctdb_pulldb pulldb;336 1277 int *err_list; 337 1278 bool status; … … 380 1321 recdb_name(state->recdb), state->max_pnn, max_seqnum); 381 1322 382 pulldb.db_id = state->db_id; 383 pulldb.lmaster = CTDB_LMASTER_ANY; 384 385 ctdb_req_control_pull_db(&request, &pulldb); 386 subreq = ctdb_client_control_send(state, state->ev, state->client, 387 state->max_pnn, TIMEOUT(), &request); 1323 subreq = pull_database_send(state, state->ev, state->client, 1324 state->max_pnn, 1325 state->caps[state->max_pnn], 1326 state->recdb); 388 1327 if (tevent_req_nomem(subreq, req)) { 389 1328 return; … … 399 1338 struct collect_highseqnum_db_state *state = tevent_req_data( 400 1339 req, struct collect_highseqnum_db_state); 401 struct ctdb_reply_control *reply; 402 struct ctdb_rec_buffer *recbuf; 403 int ret; 404 bool status; 405 406 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 407 TALLOC_FREE(subreq); 408 if (! status) { 409 LOG("control PULL_DB failed for %s on node %u, ret=%d\n", 410 recdb_name(state->recdb), state->max_pnn, ret); 411 tevent_req_error(req, ret); 412 return; 413 } 414 415 ret = ctdb_reply_control_pull_db(reply, state, &recbuf); 416 if (ret != 0) { 417 tevent_req_error(req, EPROTO); 418 return; 419 } 420 421 talloc_free(reply); 422 423 ret = recdb_add(state->recdb, ctdb_client_pnn(state->client), recbuf); 424 talloc_free(recbuf); 425 if (! ret) { 426 tevent_req_error(req, EIO); 1340 int ret; 1341 bool status; 1342 1343 status = pull_database_recv(subreq, &ret); 1344 TALLOC_FREE(subreq); 1345 if (! status) { 1346 state->ban_credits[state->max_pnn] += 1; 1347 tevent_req_error(req, ret); 427 1348 return; 428 1349 } … … 433 1354 static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr) 434 1355 { 435 int err; 436 437 if (tevent_req_is_unix_error(req, &err)) { 438 if (perr != NULL) { 439 *perr = err; 440 } 441 return false; 442 } 443 444 return true; 1356 return generic_recv(req, perr); 445 1357 } 446 1358 … … 454 1366 uint32_t *pnn_list; 455 1367 int count; 1368 uint32_t *caps; 1369 uint32_t *ban_credits; 456 1370 uint32_t db_id; 457 1371 struct recdb_context *recdb; … … 466 1380 struct tevent_context *ev, 467 1381 struct ctdb_client_context *client, 468 uint32_t *pnn_list, int count, 469 uint32_t db_id, struct recdb_context *recdb) 1382 uint32_t *pnn_list, int count, uint32_t *caps, 1383 uint32_t *ban_credits, uint32_t db_id, 1384 struct recdb_context *recdb) 470 1385 { 471 1386 struct tevent_req *req, *subreq; 472 1387 struct collect_all_db_state *state; 473 struct ctdb_req_control request;1388 uint32_t pnn; 474 1389 475 1390 req = tevent_req_create(mem_ctx, &state, … … 483 1398 state->pnn_list = pnn_list; 484 1399 state->count = count; 1400 state->caps = caps; 1401 state->ban_credits = ban_credits; 485 1402 state->db_id = db_id; 486 1403 state->recdb = recdb; 487 488 state->pulldb.db_id = db_id;489 state->pulldb.lmaster = CTDB_LMASTER_ANY;490 491 1404 state->index = 0; 492 1405 493 ctdb_req_control_pull_db(&request, &state->pulldb); 494 subreq = ctdb_client_control_send(state, ev, client, 495 state->pnn_list[state->index], 496 TIMEOUT(), &request); 1406 pnn = state->pnn_list[state->index]; 1407 1408 subreq = pull_database_send(state, ev, client, pnn, caps[pnn], recdb); 497 1409 if (tevent_req_nomem(subreq, req)) { 498 1410 return tevent_req_post(req, ev); … … 509 1421 struct collect_all_db_state *state = tevent_req_data( 510 1422 req, struct collect_all_db_state); 511 struct ctdb_reply_control *reply; 512 struct ctdb_req_control request; 513 struct ctdb_rec_buffer *recbuf; 514 int ret; 515 bool status; 516 517 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 518 TALLOC_FREE(subreq); 519 if (! status) { 520 LOG("control PULL_DB failed for %s from node %u, ret=%d\n", 521 recdb_name(state->recdb), state->pnn_list[state->index], 522 ret); 523 tevent_req_error(req, ret); 524 return; 525 } 526 527 ret = ctdb_reply_control_pull_db(reply, state, &recbuf); 528 if (ret != 0) { 529 LOG("control PULL_DB failed for %s, ret=%d\n", 530 recdb_name(state->recdb), ret); 531 tevent_req_error(req, EPROTO); 532 return; 533 } 534 535 talloc_free(reply); 536 537 status = recdb_add(state->recdb, ctdb_client_pnn(state->client), recbuf); 538 talloc_free(recbuf); 539 if (! status) { 540 tevent_req_error(req, EIO); 1423 uint32_t pnn; 1424 int ret; 1425 bool status; 1426 1427 status = pull_database_recv(subreq, &ret); 1428 TALLOC_FREE(subreq); 1429 if (! status) { 1430 pnn = state->pnn_list[state->index]; 1431 state->ban_credits[pnn] += 1; 1432 tevent_req_error(req, ret); 541 1433 return; 542 1434 } … … 548 1440 } 549 1441 550 ctdb_req_control_pull_db(&request, &state->pulldb); 551 subreq = ctdb_client_control_send(state, state->ev, state->client, 552 state->pnn_list[state->index], 553 TIMEOUT(), &request); 1442 pnn = state->pnn_list[state->index]; 1443 subreq = pull_database_send(state, state->ev, state->client, 1444 pnn, state->caps[pnn], state->recdb); 554 1445 if (tevent_req_nomem(subreq, req)) { 555 1446 return; … … 560 1451 static bool collect_all_db_recv(struct tevent_req *req, int *perr) 561 1452 { 562 int err; 563 564 if (tevent_req_is_unix_error(req, &err)) { 565 if (perr != NULL) { 566 *perr = err; 567 } 568 return false; 569 } 570 571 return true; 1453 return generic_recv(req, perr); 572 1454 } 573 1455 … … 592 1474 uint32_t *pnn_list; 593 1475 int count; 1476 uint32_t *caps; 1477 uint32_t *ban_credits; 594 1478 uint32_t db_id; 595 1479 bool persistent; … … 600 1484 const char *db_name, *db_path; 601 1485 struct recdb_context *recdb; 602 struct ctdb_rec_buffer *recbuf;603 604 1486 }; 605 1487 … … 619 1501 struct ctdb_tunable_list *tun_list, 620 1502 uint32_t *pnn_list, int count, 1503 uint32_t *caps, 1504 uint32_t *ban_credits, 621 1505 uint32_t generation, 622 1506 uint32_t db_id, bool persistent) … … 636 1520 state->pnn_list = pnn_list; 637 1521 state->count = count; 1522 state->caps = caps; 1523 state->ban_credits = ban_credits; 638 1524 state->db_id = db_id; 639 1525 state->persistent = persistent; … … 820 1706 subreq = collect_highseqnum_db_send( 821 1707 state, state->ev, state->client, 822 state->pnn_list, state->count, 823 state->db_id, state->recdb); 1708 state->pnn_list, state->count, state->caps, 1709 state->ban_credits, state->db_id, 1710 state->recdb); 824 1711 } else { 825 1712 subreq = collect_all_db_send( 826 1713 state, state->ev, state->client, 827 state->pnn_list, state->count, 828 state->db_id, state->recdb); 1714 state->pnn_list, state->count, state->caps, 1715 state->ban_credits, state->db_id, 1716 state->recdb); 829 1717 } 830 1718 if (tevent_req_nomem(subreq, req)) { … … 872 1760 struct recover_db_state *state = tevent_req_data( 873 1761 req, struct recover_db_state); 874 struct ctdb_req_control request;875 1762 int *err_list; 876 1763 int ret; … … 898 1785 } 899 1786 900 state->recbuf = recdb_records(state->recdb, state, state->destnode); 901 if (tevent_req_nomem(state->recbuf, req)) { 902 return; 903 } 904 905 TALLOC_FREE(state->recdb); 906 907 ctdb_req_control_push_db(&request, state->recbuf); 908 subreq = ctdb_client_control_multi_send(state, state->ev, 909 state->client, 910 state->pnn_list, state->count, 911 TIMEOUT(), &request); 1787 subreq = push_database_send(state, state->ev, state->client, 1788 state->pnn_list, state->count, 1789 state->caps, state->tun_list, 1790 state->recdb); 912 1791 if (tevent_req_nomem(subreq, req)) { 913 1792 return; … … 923 1802 req, struct recover_db_state); 924 1803 struct ctdb_req_control request; 925 int *err_list; 926 int ret; 927 bool status; 928 929 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list, 930 NULL); 931 TALLOC_FREE(subreq); 932 if (! status) { 933 int ret2; 934 uint32_t pnn; 935 936 ret2 = ctdb_client_control_multi_error(state->pnn_list, 937 state->count, 938 err_list, &pnn); 939 if (ret2 != 0) { 940 LOG("control PUSHDB failed for db %s on node %u," 941 " ret=%d\n", state->db_name, pnn, ret2); 942 } else { 943 LOG("control PUSHDB failed for db %s, ret=%d\n", 944 state->db_name, ret); 945 } 946 tevent_req_error(req, ret); 947 return; 948 } 949 950 TALLOC_FREE(state->recbuf); 1804 int ret; 1805 bool status; 1806 1807 status = push_database_recv(subreq, &ret); 1808 TALLOC_FREE(subreq); 1809 if (! status) { 1810 tevent_req_error(req, ret); 1811 return; 1812 } 1813 1814 TALLOC_FREE(state->recdb); 951 1815 952 1816 ctdb_req_control_db_transaction_commit(&request, &state->transdb); … … 1040 1904 static bool recover_db_recv(struct tevent_req *req) 1041 1905 { 1042 int err; 1043 1044 if (tevent_req_is_unix_error(req, &err)) { 1045 return false; 1046 } 1047 1048 return true; 1906 return generic_recv(req, NULL); 1049 1907 } 1050 1908 … … 1070 1928 uint32_t *pnn_list; 1071 1929 int count; 1930 uint32_t *caps; 1931 uint32_t *ban_credits; 1072 1932 uint32_t generation; 1073 1933 uint32_t db_id; … … 1084 1944 struct ctdb_tunable_list *tun_list, 1085 1945 uint32_t *pnn_list, int count, 1946 uint32_t *caps, 1947 uint32_t *ban_credits, 1086 1948 uint32_t generation) 1087 1949 { … … 1119 1981 substate->pnn_list = pnn_list; 1120 1982 substate->count = count; 1983 substate->caps = caps; 1984 substate->ban_credits = ban_credits; 1121 1985 substate->generation = generation; 1122 1986 substate->db_id = dbmap->dbs[i].db_id; … … 1125 1989 1126 1990 subreq = recover_db_send(state, ev, client, tun_list, 1127 pnn_list, count, generation,1128 substate->db_id,1991 pnn_list, count, caps, ban_credits, 1992 generation, substate->db_id, 1129 1993 substate->persistent); 1130 1994 if (tevent_req_nomem(subreq, req)) { … … 1157 2021 1158 2022 substate->num_fails += 1; 1159 if (substate->num_fails < 5) {2023 if (substate->num_fails < NUM_RETRIES) { 1160 2024 subreq = recover_db_send(state, state->ev, substate->client, 1161 2025 substate->tun_list, 1162 2026 substate->pnn_list, substate->count, 2027 substate->caps, substate->ban_credits, 1163 2028 substate->generation, substate->db_id, 1164 2029 substate->persistent); … … 1207 2072 * Run the parallel database recovery 1208 2073 * 2074 * - Get tunables 1209 2075 * - Get nodemap 1210 2076 * - Get vnnmap 1211 2077 * - Get capabilities from all nodes 1212 * - Get tunables from all nodes1213 2078 * - Get dbmap 1214 2079 * - Set RECOVERY_ACTIVE … … 1229 2094 struct ctdb_node_map *nodemap; 1230 2095 uint32_t *caps; 2096 uint32_t *ban_credits; 1231 2097 struct ctdb_tunable_list *tun_list; 1232 2098 struct ctdb_vnn_map *vnnmap; … … 1234 2100 }; 1235 2101 2102 static void recovery_tunables_done(struct tevent_req *subreq); 1236 2103 static void recovery_nodemap_done(struct tevent_req *subreq); 1237 2104 static void recovery_vnnmap_done(struct tevent_req *subreq); 1238 2105 static void recovery_capabilities_done(struct tevent_req *subreq); 1239 static void recovery_tunables_done(struct tevent_req *subreq);1240 2106 static void recovery_dbmap_done(struct tevent_req *subreq); 1241 2107 static void recovery_active_done(struct tevent_req *subreq); … … 1243 2109 static void recovery_vnnmap_update_done(struct tevent_req *subreq); 1244 2110 static void recovery_db_recovery_done(struct tevent_req *subreq); 2111 static void recovery_failed_done(struct tevent_req *subreq); 1245 2112 static void recovery_normal_done(struct tevent_req *subreq); 1246 2113 static void recovery_end_recovery_done(struct tevent_req *subreq); … … 1265 2132 state->destnode = ctdb_client_pnn(client); 1266 2133 1267 ctdb_req_control_get_nodemap(&request); 1268 subreq = ctdb_client_control_send(mem_ctx, ev, client, state->destnode, 1269 TIMEOUT(), &request); 2134 ctdb_req_control_get_all_tunables(&request); 2135 subreq = ctdb_client_control_send(state, state->ev, state->client, 2136 state->destnode, TIMEOUT(), 2137 &request); 1270 2138 if (tevent_req_nomem(subreq, req)) { 1271 2139 return tevent_req_post(req, ev); 1272 2140 } 1273 tevent_req_set_callback(subreq, recovery_ nodemap_done, req);2141 tevent_req_set_callback(subreq, recovery_tunables_done, req); 1274 2142 1275 2143 return req; 1276 2144 } 1277 2145 1278 static void recovery_ nodemap_done(struct tevent_req *subreq)2146 static void recovery_tunables_done(struct tevent_req *subreq) 1279 2147 { 1280 2148 struct tevent_req *req = tevent_req_callback_data( … … 1284 2152 struct ctdb_reply_control *reply; 1285 2153 struct ctdb_req_control request; 2154 int ret; 2155 bool status; 2156 2157 status = ctdb_client_control_recv(subreq, &ret, state, &reply); 2158 TALLOC_FREE(subreq); 2159 if (! status) { 2160 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret); 2161 tevent_req_error(req, ret); 2162 return; 2163 } 2164 2165 ret = ctdb_reply_control_get_all_tunables(reply, state, 2166 &state->tun_list); 2167 if (ret != 0) { 2168 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret); 2169 tevent_req_error(req, EPROTO); 2170 return; 2171 } 2172 2173 talloc_free(reply); 2174 2175 recover_timeout = state->tun_list->recover_timeout; 2176 2177 ctdb_req_control_get_nodemap(&request); 2178 subreq = ctdb_client_control_send(state, state->ev, state->client, 2179 state->destnode, TIMEOUT(), 2180 &request); 2181 if (tevent_req_nomem(subreq, req)) { 2182 return; 2183 } 2184 tevent_req_set_callback(subreq, recovery_nodemap_done, req); 2185 } 2186 2187 static void recovery_nodemap_done(struct tevent_req *subreq) 2188 { 2189 struct tevent_req *req = tevent_req_callback_data( 2190 subreq, struct tevent_req); 2191 struct recovery_state *state = tevent_req_data( 2192 req, struct recovery_state); 2193 struct ctdb_reply_control *reply; 2194 struct ctdb_req_control request; 1286 2195 bool status; 1287 2196 int ret; … … 1307 2216 if (state->count <= 0) { 1308 2217 tevent_req_error(req, ENOMEM); 2218 return; 2219 } 2220 2221 state->ban_credits = talloc_zero_array(state, uint32_t, 2222 state->nodemap->num); 2223 if (tevent_req_nomem(state->ban_credits, req)) { 1309 2224 return; 1310 2225 } … … 1408 2323 return; 1409 2324 } 1410 }1411 1412 talloc_free(reply);1413 1414 ctdb_req_control_get_all_tunables(&request);1415 subreq = ctdb_client_control_send(state, state->ev, state->client,1416 state->destnode, TIMEOUT(),1417 &request);1418 if (tevent_req_nomem(subreq, req)) {1419 return;1420 }1421 tevent_req_set_callback(subreq, recovery_tunables_done, req);1422 }1423 1424 static void recovery_tunables_done(struct tevent_req *subreq)1425 {1426 struct tevent_req *req = tevent_req_callback_data(1427 subreq, struct tevent_req);1428 struct recovery_state *state = tevent_req_data(1429 req, struct recovery_state);1430 struct ctdb_reply_control *reply;1431 struct ctdb_req_control request;1432 int ret;1433 bool status;1434 1435 status = ctdb_client_control_recv(subreq, &ret, state, &reply);1436 TALLOC_FREE(subreq);1437 if (! status) {1438 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);1439 tevent_req_error(req, ret);1440 return;1441 }1442 1443 ret = ctdb_reply_control_get_all_tunables(reply, state,1444 &state->tun_list);1445 if (ret != 0) {1446 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);1447 tevent_req_error(req, EPROTO);1448 return;1449 2325 } 1450 2326 … … 1675 2551 state->dbmap, state->tun_list, 1676 2552 state->pnn_list, state->count, 2553 state->caps, state->ban_credits, 1677 2554 state->vnnmap->generation); 1678 2555 if (tevent_req_nomem(subreq, req)) { … … 1695 2572 TALLOC_FREE(subreq); 1696 2573 1697 LOG("%d databases recovered\n", count); 1698 1699 if (! status) { 1700 tevent_req_error(req, EIO); 2574 LOG("%d of %d databases recovered\n", count, state->dbmap->num); 2575 2576 if (! status) { 2577 uint32_t max_pnn = CTDB_UNKNOWN_PNN, max_credits = 0; 2578 int i; 2579 2580 /* Bans are not enabled */ 2581 if (state->tun_list->enable_bans == 0) { 2582 tevent_req_error(req, EIO); 2583 return; 2584 } 2585 2586 for (i=0; i<state->count; i++) { 2587 uint32_t pnn; 2588 pnn = state->pnn_list[i]; 2589 if (state->ban_credits[pnn] > max_credits) { 2590 max_pnn = pnn; 2591 max_credits = state->ban_credits[pnn]; 2592 } 2593 } 2594 2595 /* If pulling database fails multiple times */ 2596 if (max_credits >= NUM_RETRIES) { 2597 struct ctdb_req_message message; 2598 2599 LOG("Assigning banning credits to node %u\n", max_pnn); 2600 2601 message.srvid = CTDB_SRVID_BANNING; 2602 message.data.pnn = max_pnn; 2603 2604 subreq = ctdb_client_message_send( 2605 state, state->ev, state->client, 2606 ctdb_client_pnn(state->client), 2607 &message); 2608 if (tevent_req_nomem(subreq, req)) { 2609 return; 2610 } 2611 tevent_req_set_callback(subreq, recovery_failed_done, 2612 req); 2613 } else { 2614 tevent_req_error(req, EIO); 2615 } 1701 2616 return; 1702 2617 } … … 1711 2626 } 1712 2627 tevent_req_set_callback(subreq, recovery_normal_done, req); 2628 } 2629 2630 static void recovery_failed_done(struct tevent_req *subreq) 2631 { 2632 struct tevent_req *req = tevent_req_callback_data( 2633 subreq, struct tevent_req); 2634 int ret; 2635 bool status; 2636 2637 status = ctdb_client_message_recv(subreq, &ret); 2638 TALLOC_FREE(subreq); 2639 if (! status) { 2640 LOG("failed to assign banning credits, ret=%d\n", ret); 2641 } 2642 2643 tevent_req_error(req, EIO); 1713 2644 } 1714 2645 … … 1795 2726 static void recovery_recv(struct tevent_req *req, int *perr) 1796 2727 { 1797 int err; 1798 1799 if (tevent_req_is_unix_error(req, &err)) { 1800 if (perr != NULL) { 1801 *perr = err; 1802 } 1803 return; 1804 } 2728 generic_recv(req, perr); 1805 2729 } 1806 2730 
- 
      vendor/current/ctdb/server/ctdb_takeover.cr988 r989 404 404 } 405 405 406 struct takeover_callback_state {407 struct ctdb_req_control_old *c;408 ctdb_sock_addr *addr;409 struct ctdb_vnn *vnn;410 };411 412 406 struct ctdb_do_takeip_state { 413 407 struct ctdb_req_control_old *c; … … 502 496 CTDB_NO_MEMORY(ctdb, state); 503 497 504 state->c = talloc_steal(ctdb, c);498 state->c = NULL; 505 499 state->vnn = vnn; 506 500 … … 531 525 } 532 526 527 state->c = talloc_steal(ctdb, c); 533 528 return 0; 534 529 } … … 639 634 CTDB_NO_MEMORY(ctdb, state); 640 635 641 state->c = talloc_steal(ctdb, c);636 state->c = NULL; 642 637 state->old = old; 643 638 state->vnn = vnn; … … 671 666 } 672 667 668 state->c = talloc_steal(ctdb, c); 673 669 return 0; 674 670 } … … 816 812 } 817 813 818 /*819 kill any clients that are registered with a IP that is being released820 */821 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)822 {823 struct ctdb_client_ip *ip;824 825 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",826 ctdb_addr_to_str(addr)));827 828 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {829 ctdb_sock_addr tmp_addr;830 831 tmp_addr = ip->addr;832 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",833 ip->client_id,834 ctdb_addr_to_str(&ip->addr)));835 836 if (ctdb_same_ip(&tmp_addr, addr)) {837 struct ctdb_client *client = reqid_find(ctdb->idr,838 ip->client_id,839 struct ctdb_client);840 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",841 ip->client_id,842 ctdb_addr_to_str(&ip->addr),843 client->pid));844 845 if (client->pid != 0) {846 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",847 (unsigned)client->pid,848 ctdb_addr_to_str(addr),849 ip->client_id));850 kill(client->pid, SIGKILL);851 }852 }853 }854 }855 856 814 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn) 857 815 { … … 862 820 } 863 821 822 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb, 823 struct ctdb_vnn *vnn, 824 ctdb_sock_addr *addr) 825 { 826 TDB_DATA data; 827 828 /* Send a message to all clients of this node telling them 829 * that the cluster has been reconfigured and they should 830 * close any connections on this IP address 831 */ 832 data.dptr = (uint8_t *)ctdb_addr_to_str(addr); 833 data.dsize = strlen((char *)data.dptr)+1; 834 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr)); 835 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); 836 837 ctdb_vnn_unassign_iface(ctdb, vnn); 838 839 /* Process the IP if it has been marked for deletion */ 840 if (vnn->delete_pending) { 841 do_delete_ip(ctdb, vnn); 842 return NULL; 843 } 844 845 return vnn; 846 } 847 848 struct release_ip_callback_state { 849 struct ctdb_req_control_old *c; 850 ctdb_sock_addr *addr; 851 struct ctdb_vnn *vnn; 852 uint32_t target_pnn; 853 }; 854 864 855 /* 865 856 called when releaseip event finishes 866 857 */ 867 static void release_ip_callback(struct ctdb_context *ctdb, int status, 858 static void release_ip_callback(struct ctdb_context *ctdb, int status, 868 859 void *private_data) 869 860 { 870 struct takeover_callback_state *state = 871 talloc_get_type(private_data, struct takeover_callback_state); 872 TDB_DATA data; 861 struct release_ip_callback_state *state = 862 talloc_get_type(private_data, struct release_ip_callback_state); 873 863 874 864 if (status == -ETIME) { … … 888 878 } 889 879 890 /* send a message to all clients of this node telling them 891 that the cluster has been reconfigured and they should 892 release any sockets on this IP */ 893 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr)); 894 CTDB_NO_MEMORY_VOID(ctdb, data.dptr); 895 data.dsize = strlen((char *)data.dptr)+1; 896 897 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); 898 899 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); 900 901 /* kill clients that have registered with this IP */ 902 release_kill_clients(ctdb, state->addr); 903 904 ctdb_vnn_unassign_iface(ctdb, state->vnn); 905 906 /* Process the IP if it has been marked for deletion */ 907 if (state->vnn->delete_pending) { 908 do_delete_ip(ctdb, state->vnn); 909 state->vnn = NULL; 910 } 880 state->vnn->pnn = state->target_pnn; 881 state->vnn = release_ip_post(ctdb, state->vnn, state->addr); 911 882 912 883 /* the control succeeded */ … … 915 886 } 916 887 917 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)888 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state) 918 889 { 919 890 if (state->vnn != NULL) { … … 932 903 { 933 904 int ret; 934 struct takeover_callback_state *state;905 struct release_ip_callback_state *state; 935 906 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; 936 907 struct ctdb_vnn *vnn; … … 944 915 return 0; 945 916 } 946 vnn->pnn = pip->pnn;947 917 948 918 /* stop any previous arps */ … … 950 920 vnn->takeover_ctx = NULL; 951 921 952 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send 953 * lazy multicast to drop an IP from any node that isn't the 954 * intended new node. The following causes makes ctdbd ignore 955 * a release for any address it doesn't host. 922 /* RELEASE_IP controls are sent to all nodes that should not 923 * be hosting a particular IP. This serves 2 purposes. The 924 * first is to help resolve any inconsistencies. If a node 925 * does unexpectly host an IP then it will be released. The 926 * 2nd is to use a "redundant release" to tell non-takeover 927 * nodes where an IP is moving to. This is how "ctdb ip" can 928 * report the (likely) location of an IP by only asking the 929 * local node. Redundant releases need to update the PNN but 930 * are otherwise ignored. 956 931 */ 957 932 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) { … … 961 936 vnn->public_netmask_bits, 962 937 ctdb_vnn_iface_string(vnn))); 938 vnn->pnn = pip->pnn; 963 939 ctdb_vnn_unassign_iface(ctdb, vnn); 964 940 return 0; … … 969 945 ctdb_addr_to_str(&pip->addr), 970 946 vnn->public_netmask_bits)); 947 vnn->pnn = pip->pnn; 971 948 return 0; 972 949 } … … 994 971 pip->pnn)); 995 972 996 state = talloc(ctdb, struct takeover_callback_state);973 state = talloc(ctdb, struct release_ip_callback_state); 997 974 if (state == NULL) { 998 975 ctdb_set_error(ctdb, "Out of memory at %s:%d", … … 1002 979 } 1003 980 1004 state->c = talloc_steal(state, c);1005 state->addr = talloc(state, ctdb_sock_addr); 981 state->c = NULL; 982 state->addr = talloc(state, ctdb_sock_addr); 1006 983 if (state->addr == NULL) { 1007 984 ctdb_set_error(ctdb, "Out of memory at %s:%d", … … 1012 989 } 1013 990 *state->addr = pip->addr; 991 state->target_pnn = pip->pnn; 1014 992 state->vnn = vnn; 1015 993 … … 1035 1013 /* tell the control that we will be reply asynchronously */ 1036 1014 *async_reply = true; 1015 state->c = talloc_steal(state, c); 1037 1016 return 0; 1038 1017 } … … 1776 1755 bool can_host_ips; 1777 1756 1757 /* Default timeout for early jump to IPREALLOCATED. See below 1758 * for explanation of 3 times... */ 1759 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0); 1760 1778 1761 /* 1779 1762 * ip failover is completely disabled, just send out the … … 1852 1835 1853 1836 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */ 1837 1838 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP, 1839 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout 1840 * seconds. However, RELEASE_IP can take longer due to TCP 1841 * connection killing, so sometimes needs more time. 1842 * Therefore, use a cumulative timeout of TakeoverTimeout * 3 1843 * seconds across all 3 stages. No explicit expiry checks are 1844 * needed before each stage because tevent is smart enough to 1845 * fire the timeouts even if they are in the past. Initialise 1846 * this here so it explicitly covers the stages we're 1847 * interested in but, in particular, not the time taken by the 1848 * ipalloc(). 1849 */ 1850 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0); 1854 1851 1855 1852 /* Send a RELEASE_IP to all nodes that should not be hosting … … 1875 1872 ip.addr = tmp_ip->addr; 1876 1873 1877 timeout = TAKEOVER_TIMEOUT();1878 1874 data.dsize = sizeof(ip); 1879 1875 data.dptr = (uint8_t *)&ip; … … 1918 1914 ip.addr = tmp_ip->addr; 1919 1915 1920 timeout = TAKEOVER_TIMEOUT();1921 1916 data.dsize = sizeof(ip); 1922 1917 data.dptr = (uint8_t *)&ip; … … 1956 1951 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true); 1957 1952 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED, 1958 nodes, 0, TAKEOVER_TIMEOUT(),1953 nodes, 0, timeout, 1959 1954 false, tdb_null, 1960 1955 NULL, iprealloc_fail_callback, … … 2377 2372 void ctdb_release_all_ips(struct ctdb_context *ctdb) 2378 2373 { 2379 struct ctdb_vnn *vnn ;2374 struct ctdb_vnn *vnn, *next; 2380 2375 int count = 0; 2381 2376 … … 2384 2379 } 2385 2380 2386 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { 2381 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) { 2382 /* vnn can be freed below in release_ip_post() */ 2383 next = vnn->next; 2384 2387 2385 if (!ctdb_sys_have_ip(&vnn->public_address)) { 2388 2386 ctdb_vnn_unassign_iface(ctdb, vnn); 2389 continue;2390 }2391 if (!vnn->iface) {2392 2387 continue; 2393 2388 } … … 2413 2408 2414 2409 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u", 2415 ctdb_vnn_iface_string(vnn), 2416 ctdb_addr_to_str(&vnn->public_address), 2417 vnn->public_netmask_bits); 2418 release_kill_clients(ctdb, &vnn->public_address); 2419 ctdb_vnn_unassign_iface(ctdb, vnn); 2420 vnn->update_in_flight = false; 2410 ctdb_vnn_iface_string(vnn), 2411 ctdb_addr_to_str(&vnn->public_address), 2412 vnn->public_netmask_bits); 2413 /* releaseip timeouts are converted to success, so to 2414 * detect failures just check if the IP address is 2415 * still there... 2416 */ 2417 if (ctdb_sys_have_ip(&vnn->public_address)) { 2418 DEBUG(DEBUG_ERR, 2419 (__location__ 2420 " IP address %s not released\n", 2421 ctdb_addr_to_str(&vnn->public_address))); 2422 vnn->update_in_flight = false; 2423 continue; 2424 } 2425 2426 vnn = release_ip_post(ctdb, vnn, &vnn->public_address); 2427 if (vnn != NULL) { 2428 vnn->update_in_flight = false; 2429 } 2421 2430 count++; 2422 2431 } 
- 
      vendor/current/ctdb/server/ctdb_tunables.cr988 r989 42 42 { "KeepaliveInterval", 5, offsetof(struct ctdb_tunable_list, keepalive_interval), false }, 43 43 { "KeepaliveLimit", 5, offsetof(struct ctdb_tunable_list, keepalive_limit), false }, 44 { "RecoverTimeout", 120, offsetof(struct ctdb_tunable_list, recover_timeout), false },44 { "RecoverTimeout", 30, offsetof(struct ctdb_tunable_list, recover_timeout), false }, 45 45 { "RecoverInterval", 1, offsetof(struct ctdb_tunable_list, recover_interval), false }, 46 46 { "ElectionTimeout", 3, offsetof(struct ctdb_tunable_list, election_timeout), false }, … … 94 94 { "TDBMutexEnabled", 0, offsetof(struct ctdb_tunable_list, mutex_enabled), false }, 95 95 { "LockProcessesPerDB", 200, offsetof(struct ctdb_tunable_list, lock_processes_per_db), false }, 96 { "RecBufferSizeLimit", 1000000, offsetof(struct ctdb_tunable_list, rec_buffer_size_limit), false }, 96 97 }; 97 98 
- 
      vendor/current/ctdb/server/ctdbd.cr988 r989 179 179 180 180 ev = tevent_context_init(NULL); 181 if (ev == NULL) { 182 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n")); 183 exit(1); 184 } 181 185 tevent_loop_allow_nesting(ev); 182 186 
- 
      vendor/current/ctdb/server/eventscript.cr988 r989 697 697 { 698 698 DLIST_REMOVE(callback->ctdb->script_callbacks, callback); 699 return 0; 700 } 701 702 struct schedule_callback_state { 703 struct ctdb_context *ctdb; 704 void (*callback)(struct ctdb_context *, int, void *); 705 void *private_data; 706 int status; 707 struct tevent_immediate *im; 708 }; 709 710 static void schedule_callback_handler(struct tevent_context *ctx, 711 struct tevent_immediate *im, 712 void *private_data) 713 { 714 struct schedule_callback_state *state = 715 talloc_get_type_abort(private_data, 716 struct schedule_callback_state); 717 718 if (state->callback != NULL) { 719 state->callback(state->ctdb, state->status, 720 state->private_data); 721 } 722 talloc_free(state); 723 } 724 725 static int 726 schedule_callback_immediate(struct ctdb_context *ctdb, 727 void (*callback)(struct ctdb_context *, 728 int, void *), 729 void *private_data, 730 int status) 731 { 732 struct schedule_callback_state *state; 733 struct tevent_immediate *im; 734 735 state = talloc_zero(ctdb, struct schedule_callback_state); 736 if (state == NULL) { 737 DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); 738 return -1; 739 } 740 im = tevent_create_immediate(state); 741 if (im == NULL) { 742 DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); 743 talloc_free(state); 744 return -1; 745 } 746 747 state->ctdb = ctdb; 748 state->callback = callback; 749 state->private_data = private_data; 750 state->status = status; 751 state->im = im; 752 753 tevent_schedule_immediate(im, ctdb->ev, 754 schedule_callback_handler, state); 699 755 return 0; 700 756 } … … 808 864 state->child = 0; 809 865 866 /* Nothing to do? */ 867 if (state->scripts->num_scripts == 0) { 868 int ret = schedule_callback_immediate(ctdb, callback, 869 private_data, 0); 870 talloc_free(state); 871 if (ret != 0) { 872 DEBUG(DEBUG_ERR, 873 ("Unable to schedule callback for 0 scripts\n")); 874 return 1; 875 } 876 return 0; 877 } 878 879 state->scripts->scripts[0].status = fork_child_for_script(ctdb, state); 880 if (state->scripts->scripts[0].status != 0) { 881 talloc_free(state); 882 return -1; 883 } 884 810 885 if (call == CTDB_EVENT_MONITOR) { 811 886 ctdb->current_monitor = state; 812 887 } 813 888 889 ctdb->active_events++; 890 814 891 talloc_set_destructor(state, event_script_destructor); 815 816 ctdb->active_events++;817 818 /* Nothing to do? */819 if (state->scripts->num_scripts == 0) {820 callback(ctdb, 0, private_data);821 talloc_free(state);822 return 0;823 }824 825 state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);826 if (state->scripts->scripts[0].status != 0) {827 /* Callback is called from destructor, with fail result. */828 talloc_free(state);829 return 0;830 }831 892 832 893 if (!timeval_is_zero(&state->timeout)) { … … 1008 1069 CTDB_NO_MEMORY(ctdb, state); 1009 1070 1010 state->c = talloc_steal(state, c);1071 state->c = NULL; 1011 1072 1012 1073 DEBUG(DEBUG_NOTICE,("Running eventscripts with arguments %s\n", indata.dptr)); … … 1024 1085 /* tell ctdb_control.c that we will be replying asynchronously */ 1025 1086 *async_reply = true; 1026 1087 state->c = talloc_steal(state, c); 1027 1088 return 0; 1028 1089 } 
- 
      vendor/current/ctdb/tests/src/ctdbd_test.cr988 r989 48 48 #include "common/reqid.c" 49 49 #include "common/logging.c" 50 #include "common/pidfile.c" 50 51 51 52 /* CTDB_SERVER_OBJ */ 
- 
      vendor/current/ctdb/tests/src/protocol_client_test.cr988 r989 657 657 cd->data.db_id = rand32(); 658 658 break; 659 660 case CTDB_CONTROL_DB_PULL: 661 cd->data.pulldb_ext = talloc(mem_ctx, struct ctdb_pulldb_ext); 662 assert(cd->data.pulldb_ext != NULL); 663 fill_ctdb_pulldb_ext(mem_ctx, cd->data.pulldb_ext); 664 break; 665 666 case CTDB_CONTROL_DB_PUSH_START: 667 cd->data.pulldb_ext = talloc(mem_ctx, struct ctdb_pulldb_ext); 668 assert(cd->data.pulldb_ext != NULL); 669 fill_ctdb_pulldb_ext(mem_ctx, cd->data.pulldb_ext); 670 break; 671 672 case CTDB_CONTROL_DB_PUSH_CONFIRM: 673 cd->data.db_id = rand32(); 674 break; 675 659 676 } 660 677 } … … 1104 1121 assert(cd->data.db_id == cd2->data.db_id); 1105 1122 break; 1123 1124 case CTDB_CONTROL_DB_PULL: 1125 verify_ctdb_pulldb_ext(cd->data.pulldb_ext, 1126 cd2->data.pulldb_ext); 1127 break; 1128 1129 case CTDB_CONTROL_DB_PUSH_START: 1130 verify_ctdb_pulldb_ext(cd->data.pulldb_ext, 1131 cd2->data.pulldb_ext); 1132 break; 1133 1134 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1135 assert(cd->data.db_id == cd2->data.db_id); 1136 break; 1137 1106 1138 } 1107 1139 } … … 1560 1592 break; 1561 1593 1594 case CTDB_CONTROL_DB_PULL: 1595 cd->data.num_records = rand32(); 1596 break; 1597 1598 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1599 cd->data.num_records = rand32(); 1600 break; 1601 1562 1602 } 1563 1603 } … … 1941 1981 case CTDB_CONTROL_GET_NODES_FILE: 1942 1982 verify_ctdb_node_map(cd->data.nodemap, cd2->data.nodemap); 1983 break; 1984 1985 case CTDB_CONTROL_DB_PULL: 1986 assert(cd->data.num_records == cd2->data.num_records); 1987 break; 1988 1989 case CTDB_CONTROL_DB_PUSH_CONFIRM: 1990 assert(cd->data.num_records == cd2->data.num_records); 1943 1991 break; 1944 1992 
- 
      vendor/current/ctdb/tests/src/protocol_types_test.cr988 r989 20 20 #include "replace.h" 21 21 #include "system/network.h" 22 #include "system/filesys.h" 22 23 23 24 #include <assert.h> … … 181 182 assert(p1->db_id == p2->db_id); 182 183 assert(p1->lmaster == p2->lmaster); 184 } 185 186 static void fill_ctdb_pulldb_ext(TALLOC_CTX *mem_ctx, 187 struct ctdb_pulldb_ext *p) 188 { 189 p->db_id = rand32(); 190 p->lmaster = rand32(); 191 p->srvid = rand64(); 192 } 193 194 static void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1, 195 struct ctdb_pulldb_ext *p2) 196 { 197 assert(p1->db_id == p2->db_id); 198 assert(p1->lmaster == p2->lmaster); 199 assert(p1->srvid == p2->srvid); 183 200 } 184 201 … … 1178 1195 DEFINE_TEST(struct ctdb_dbid_map, ctdb_dbid_map); 1179 1196 DEFINE_TEST(struct ctdb_pulldb, ctdb_pulldb); 1197 DEFINE_TEST(struct ctdb_pulldb_ext, ctdb_pulldb_ext); 1180 1198 DEFINE_TEST(struct ctdb_rec_data, ctdb_rec_data); 1181 1199 DEFINE_TEST(struct ctdb_rec_buffer, ctdb_rec_buffer); … … 1219 1237 DEFINE_TEST(struct ctdb_g_lock_list, ctdb_g_lock_list); 1220 1238 1239 static void test_ctdb_rec_buffer_read_write(void) 1240 { 1241 TALLOC_CTX *mem_ctx = talloc_new(NULL); 1242 struct ctdb_rec_buffer *p1, **p2; 1243 const char *filename = "ctdb_rec_buffer_test.dat"; 1244 int count = 100; 1245 int fd, i, ret; 1246 off_t offset; 1247 1248 p1 = talloc_array(mem_ctx, struct ctdb_rec_buffer, count); 1249 assert(p1 != NULL); 1250 for (i=0; i<count; i++) { 1251 fill_ctdb_rec_buffer(mem_ctx, &p1[i]); 1252 } 1253 1254 fd = open(filename, O_RDWR|O_CREAT, 0600); 1255 assert(fd != -1); 1256 unlink(filename); 1257 1258 for (i=0; i<count; i++) { 1259 ret = ctdb_rec_buffer_write(&p1[i], fd); 1260 assert(ret == 0); 1261 } 1262 1263 offset = lseek(fd, 0, SEEK_CUR); 1264 assert(offset != -1); 1265 offset = lseek(fd, -offset, SEEK_CUR); 1266 assert(offset == 0); 1267 1268 p2 = talloc_array(mem_ctx, struct ctdb_rec_buffer *, count); 1269 assert(p2 != NULL); 1270 1271 for (i=0; i<count; i++) { 1272 ret = ctdb_rec_buffer_read(fd, mem_ctx, &p2[i]); 1273 assert(ret == 0); 1274 } 1275 1276 close(fd); 1277 1278 for (i=0; i<count; i++) { 1279 verify_ctdb_rec_buffer(&p1[i], p2[i]); 1280 } 1281 1282 talloc_free(mem_ctx); 1283 } 1284 1221 1285 int main(int argc, char *argv[]) 1222 1286 { … … 1241 1305 TEST_FUNC(ctdb_dbid_map)(); 1242 1306 TEST_FUNC(ctdb_pulldb)(); 1307 TEST_FUNC(ctdb_pulldb_ext)(); 1243 1308 TEST_FUNC(ctdb_rec_data)(); 1244 1309 TEST_FUNC(ctdb_rec_buffer)(); … … 1282 1347 TEST_FUNC(ctdb_g_lock_list)(); 1283 1348 1349 test_ctdb_rec_buffer_read_write(); 1350 1284 1351 return 0; 1285 1352 } 
- 
      vendor/current/ctdb/utils/ping_pong/ping_pong.cr988 r989 143 143 if (val == NULL) { 144 144 printf("calloc failed\n"); 145 munmap(p, num_locks+1); 145 if (use_mmap) { 146 munmap(p, num_locks+1); 147 } 146 148 return; 147 149 } 
- 
      vendor/current/ctdb/wscriptr988 r989 209 209 conf.env.CTDB_TEST_DATADIR = os.path.join(conf.env.EXEC_PREFIX, 210 210 'share/ctdb-tests') 211 conf.env.CTDB_TEST_LIB DIR = os.path.join(conf.env.LIBDIR, 'ctdb-tests')211 conf.env.CTDB_TEST_LIBEXECDIR = os.path.join(conf.env.LIBEXECDIR, 'ctdb/tests') 212 212 213 213 # Allow unified compilation and separate compilation of utilities … … 344 344 '''db_hash.c srvid.c reqid.c 345 345 pkt_read.c pkt_write.c comm.c 346 logging.c '''),347 deps='replace talloc tevent tdb tevent-u nix-util')346 logging.c pidfile.c'''), 347 deps='replace talloc tevent tdb tevent-util') 348 348 349 349 bld.SAMBA_SUBSYSTEM('ctdb-protocol', … … 630 630 'protocol_types_test', 631 631 'protocol_client_test', 632 'pidfile_test', 632 633 ] 633 634 … … 637 638 bld.SAMBA_BINARY(target, 638 639 source=src, 639 deps='talloc tevent tdb tevent-u nix-util',640 install_path='${CTDB_TEST_LIB DIR}')640 deps='talloc tevent tdb tevent-util', 641 install_path='${CTDB_TEST_LIBEXECDIR}') 641 642 642 643 bld.SAMBA_BINARY('reqid_test', 643 644 source='tests/src/reqid_test.c', 644 645 deps='samba-util', 645 install_path='${CTDB_TEST_LIB DIR}')646 install_path='${CTDB_TEST_LIBEXECDIR}') 646 647 647 648 # Test binaries … … 672 673 includes='include', 673 674 deps='ctdb-client ctdb-common ctdb-util', 674 install_path='${CTDB_TEST_LIB DIR}')675 install_path='${CTDB_TEST_LIBEXECDIR}') 675 676 676 677 bld.SAMBA_BINARY('ctdb_takeover_tests', … … 681 682 ib_deps, 682 683 includes='include', 683 install_path='${CTDB_TEST_LIB DIR}')684 install_path='${CTDB_TEST_LIBEXECDIR}') 684 685 685 686 bld.SAMBA_BINARY('ctdb_functest', … … 688 689 samba-util tdb-wrap''', 689 690 includes='include', 690 install_path='${CTDB_TEST_LIB DIR}')691 install_path='${CTDB_TEST_LIBEXECDIR}') 691 692 692 693 bld.SAMBA_BINARY('ctdb_stubtest', … … 695 696 samba-util tdb-wrap''', 696 697 includes='include', 697 install_path='${CTDB_TEST_LIB DIR}')698 install_path='${CTDB_TEST_LIBEXECDIR}') 698 699 699 700 if bld.env.HAVE_INFINIBAND: … … 703 704 deps='replace talloc ctdb-client ctdb-common' + 704 705 ib_deps, 705 install_path='${CTDB_TEST_LIB DIR}')706 install_path='${CTDB_TEST_LIBEXECDIR}') 706 707 707 708 test_subdirs = [ … … 735 736 736 737 sed_expr = 's@^TEST_SCRIPTS_DIR=.*@&\\nexport TEST_BIN_DIR=\"%s\"@' % ( 737 bld.env.CTDB_TEST_LIB DIR)738 bld.env.CTDB_TEST_LIBEXECDIR) 738 739 bld.SAMBA_GENERATOR('ctdb-test-wrap', 739 740 source='tests/scripts/test_wrap', … … 744 745 745 746 sed_expr1 = 's@^test_dir=.*@test_dir=%s\\nexport TEST_BIN_DIR=\"%s\"@' % ( 746 bld.env.CTDB_TEST_DATADIR, bld.env.CTDB_TEST_LIB DIR)747 bld.env.CTDB_TEST_DATADIR, bld.env.CTDB_TEST_LIBEXECDIR) 747 748 sed_expr2 = 's@^\(export CTDB_TESTS_ARE_INSTALLED\)=false@\\1=true@' 748 749 bld.SAMBA_GENERATOR('ctdb-test-runner', 
  Note:
 See   TracChangeset
 for help on using the changeset viewer.
  
