Changeset 989 for vendor/current/ctdb


Ignore:
Timestamp:
Nov 25, 2016, 8:04:54 PM (9 years ago)
Author:
Silvan Scherrer
Message:

Samba Server: update vendor to version 4.4.7

Location:
vendor/current/ctdb
Files:
5 added
35 edited

Legend:

Unmodified
Added
Removed
  • vendor/current/ctdb/client/client.h

    r988 r989  
    7373bool ctdb_client_message_recv(struct tevent_req *req, int *perr);
    7474
     75struct tevent_req *ctdb_client_message_multi_send(
     76                                TALLOC_CTX *mem_ctx,
     77                                struct tevent_context *ev,
     78                                struct ctdb_client_context *client,
     79                                uint32_t *pnn_list, int count,
     80                                struct ctdb_req_message *message);
     81
     82bool ctdb_client_message_multi_recv(struct tevent_req *req, int *perr,
     83                                    TALLOC_CTX *mem_ctx, int **perr_list);
     84
    7585int ctdb_client_message(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
    7686                        struct ctdb_client_context *client,
    7787                        uint32_t destnode, struct ctdb_req_message *message);
     88
     89struct tevent_req *ctdb_client_set_message_handler_send(
     90                                        TALLOC_CTX *mem_ctx,
     91                                        struct tevent_context *ev,
     92                                        struct ctdb_client_context *client,
     93                                        uint64_t srvid,
     94                                        srvid_handler_fn handler,
     95                                        void *private_data);
     96bool ctdb_client_set_message_handler_recv(struct tevent_req *req, int *perr);
     97
     98struct tevent_req *ctdb_client_remove_message_handler_send(
     99                                        TALLOC_CTX *mem_ctx,
     100                                        struct tevent_context *ev,
     101                                        struct ctdb_client_context *client,
     102                                        uint64_t srvid,
     103                                        void *private_data);
     104bool ctdb_client_remove_message_handler_recv(struct tevent_req *req,
     105                                             int *perr);
    78106
    79107int ctdb_client_set_message_handler(TALLOC_CTX *mem_ctx,
     
    710738                                    uint32_t db_id);
    711739
     740int ctdb_ctrl_db_pull(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
     741                      struct ctdb_client_context *client,
     742                      int destnode, struct timeval timeout,
     743                      struct ctdb_pulldb_ext *pulldb, uint32_t *num_records);
     744
     745int ctdb_ctrl_db_push_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
     746                            struct ctdb_client_context *client,
     747                            int destnode, struct timeval timeout,
     748                            struct ctdb_pulldb_ext *pulldb);
     749
     750int ctdb_ctrl_db_push_confirm(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
     751                              struct ctdb_client_context *client,
     752                              int destnode, struct timeval timeout,
     753                              uint32_t db_id, uint32_t *num_records);
     754
    712755/* from client/client_db.c */
    713756
  • vendor/current/ctdb/client/client_control_sync.c

    r988 r989  
    31183118        return 0;
    31193119}
     3120
     3121int ctdb_ctrl_db_pull(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
     3122                      struct ctdb_client_context *client,
     3123                      int destnode, struct timeval timeout,
     3124                      struct ctdb_pulldb_ext *pulldb, uint32_t *num_records)
     3125{
     3126        struct ctdb_req_control request;
     3127        struct ctdb_reply_control *reply;
     3128        int ret;
     3129
     3130        ctdb_req_control_db_pull(&request, pulldb);
     3131        ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
     3132                                  &request, &reply);
     3133        if (ret != 0) {
     3134                DEBUG(DEBUG_ERR,
     3135                      ("Control DB_PULL failed to node %u, ret=%d\n",
     3136                       destnode, ret));
     3137                return ret;
     3138        }
     3139
     3140        ret = ctdb_reply_control_db_pull(reply, num_records);
     3141        if (ret != 0) {
     3142                DEBUG(DEBUG_ERR, ("Control DB_PULL failed, ret=%d\n", ret));
     3143                return ret;
     3144        }
     3145
     3146        return 0;
     3147}
     3148
     3149int ctdb_ctrl_db_push_start(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
     3150                            struct ctdb_client_context *client,
     3151                            int destnode, struct timeval timeout,
     3152                            struct ctdb_pulldb_ext *pulldb)
     3153{
     3154        struct ctdb_req_control request;
     3155        struct ctdb_reply_control *reply;
     3156        int ret;
     3157
     3158        ctdb_req_control_db_push_start(&request, pulldb);
     3159        ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
     3160                                  &request, &reply);
     3161        if (ret != 0) {
     3162                DEBUG(DEBUG_ERR,
     3163                      ("Control DB_PUSH failed to node %u, ret=%d\n",
     3164                       destnode, ret));
     3165                return ret;
     3166        }
     3167
     3168        ret = ctdb_reply_control_db_push_start(reply);
     3169        if (ret != 0) {
     3170                DEBUG(DEBUG_ERR,
     3171                      ("Control DB_PUSH failed, ret=%d\n", ret));
     3172                return ret;
     3173        }
     3174
     3175        return 0;
     3176}
     3177
     3178int ctdb_ctrl_db_push_confirm(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
     3179                              struct ctdb_client_context *client,
     3180                              int destnode, struct timeval timeout,
     3181                              uint32_t db_id, uint32_t *num_records)
     3182{
     3183        struct ctdb_req_control request;
     3184        struct ctdb_reply_control *reply;
     3185        int ret;
     3186
     3187        ctdb_req_control_db_push_confirm(&request, db_id);
     3188        ret = ctdb_client_control(mem_ctx, ev, client, destnode, timeout,
     3189                                  &request, &reply);
     3190        if (ret != 0) {
     3191                DEBUG(DEBUG_ERR,
     3192                      ("Control DB_PUSH failed to node %u, ret=%d\n",
     3193                       destnode, ret));
     3194                return ret;
     3195        }
     3196
     3197        ret = ctdb_reply_control_db_push_confirm(reply, num_records);
     3198        if (ret != 0) {
     3199                DEBUG(DEBUG_ERR,
     3200                      ("Control DB_PUSH failed, ret=%d\n", ret));
     3201                return ret;
     3202        }
     3203
     3204        return 0;
     3205}
  • vendor/current/ctdb/client/client_message.c

    r988 r989  
    158158
    159159/*
     160 * Handle multiple nodes
     161 */
     162
     163struct ctdb_client_message_multi_state {
     164        uint32_t *pnn_list;
     165        int count;
     166        int done;
     167        int err;
     168        int *err_list;
     169};
     170
     171struct message_index_state {
     172        struct tevent_req *req;
     173        int index;
     174};
     175
     176static void ctdb_client_message_multi_done(struct tevent_req *subreq);
     177
     178struct tevent_req *ctdb_client_message_multi_send(
     179                                TALLOC_CTX *mem_ctx,
     180                                struct tevent_context *ev,
     181                                struct ctdb_client_context *client,
     182                                uint32_t *pnn_list, int count,
     183                                struct ctdb_req_message *message)
     184{
     185        struct tevent_req *req, *subreq;
     186        struct ctdb_client_message_multi_state *state;
     187        int i;
     188
     189        if (pnn_list == NULL || count == 0) {
     190                return NULL;
     191        }
     192
     193        req = tevent_req_create(mem_ctx, &state,
     194                                struct ctdb_client_message_multi_state);
     195        if (req == NULL) {
     196                return NULL;
     197        }
     198
     199        state->pnn_list = pnn_list;
     200        state->count = count;
     201        state->done = 0;
     202        state->err = 0;
     203        state->err_list = talloc_zero_array(state, int, count);
     204        if (tevent_req_nomem(state->err_list, req)) {
     205                return tevent_req_post(req, ev);
     206        }
     207
     208        for (i=0; i<count; i++) {
     209                struct message_index_state *substate;
     210
     211                subreq = ctdb_client_message_send(state, ev, client,
     212                                                  pnn_list[i], message);
     213                if (tevent_req_nomem(subreq, req)) {
     214                        return tevent_req_post(req, ev);
     215                }
     216
     217                substate = talloc(subreq, struct message_index_state);
     218                if (tevent_req_nomem(substate, req)) {
     219                        return tevent_req_post(req, ev);
     220                }
     221
     222                substate->req = req;
     223                substate->index = i;
     224
     225                tevent_req_set_callback(subreq, ctdb_client_message_multi_done,
     226                                        substate);
     227        }
     228
     229        return req;
     230}
     231
     232static void ctdb_client_message_multi_done(struct tevent_req *subreq)
     233{
     234        struct message_index_state *substate = tevent_req_callback_data(
     235                subreq, struct message_index_state);
     236        struct tevent_req *req = substate->req;
     237        int idx = substate->index;
     238        struct ctdb_client_message_multi_state *state = tevent_req_data(
     239                req, struct ctdb_client_message_multi_state);
     240        bool status;
     241        int ret;
     242
     243        status = ctdb_client_message_recv(subreq, &ret);
     244        TALLOC_FREE(subreq);
     245        if (! status) {
     246                if (state->err == 0) {
     247                        state->err = ret;
     248                        state->err_list[idx] = state->err;
     249                }
     250        }
     251
     252        state->done += 1;
     253
     254        if (state->done == state->count) {
     255                tevent_req_done(req);
     256        }
     257}
     258
     259bool ctdb_client_message_multi_recv(struct tevent_req *req, int *perr,
     260                                    TALLOC_CTX *mem_ctx, int **perr_list)
     261{
     262        struct ctdb_client_message_multi_state *state = tevent_req_data(
     263                req, struct ctdb_client_message_multi_state);
     264        int err;
     265
     266        if (tevent_req_is_unix_error(req, &err)) {
     267                if (perr != NULL) {
     268                        *perr = err;
     269                }
     270                if (perr_list != NULL) {
     271                        *perr_list = talloc_steal(mem_ctx, state->err_list);
     272                }
     273                return false;
     274        }
     275
     276        if (perr != NULL) {
     277                *perr = state->err;
     278        }
     279
     280        if (perr_list != NULL) {
     281                *perr_list = talloc_steal(mem_ctx, state->err_list);
     282        }
     283
     284        if (state->err != 0) {
     285                return false;
     286        }
     287
     288        return true;
     289}
     290
     291/*
    160292 * sync version of message send
    161293 */
     
    191323        talloc_free(tmp_ctx);
    192324        return 0;
     325}
     326
     327struct ctdb_client_set_message_handler_state {
     328        struct ctdb_client_context *client;
     329        uint64_t srvid;
     330        srvid_handler_fn handler;
     331        void *private_data;
     332};
     333
     334static void ctdb_client_set_message_handler_done(struct tevent_req *subreq);
     335
     336struct tevent_req *ctdb_client_set_message_handler_send(
     337                                        TALLOC_CTX *mem_ctx,
     338                                        struct tevent_context *ev,
     339                                        struct ctdb_client_context *client,
     340                                        uint64_t srvid,
     341                                        srvid_handler_fn handler,
     342                                        void *private_data)
     343{
     344        struct tevent_req *req, *subreq;
     345        struct ctdb_client_set_message_handler_state *state;
     346        struct ctdb_req_control request;
     347
     348        req = tevent_req_create(mem_ctx, &state,
     349                                struct ctdb_client_set_message_handler_state);
     350        if (req == NULL) {
     351                return NULL;
     352        }
     353
     354        state->client = client;
     355        state->srvid = srvid;
     356        state->handler = handler;
     357        state->private_data = private_data;
     358
     359        ctdb_req_control_register_srvid(&request, srvid);
     360        subreq = ctdb_client_control_send(state, ev, client, client->pnn,
     361                                          tevent_timeval_zero(), &request);
     362        if (tevent_req_nomem(subreq, req)) {
     363                return tevent_req_post(req, ev);
     364        }
     365        tevent_req_set_callback(subreq, ctdb_client_set_message_handler_done,
     366                                req);
     367
     368        return req;
     369}
     370
     371static void ctdb_client_set_message_handler_done(struct tevent_req *subreq)
     372{
     373        struct tevent_req *req = tevent_req_callback_data(
     374                subreq, struct tevent_req);
     375        struct ctdb_client_set_message_handler_state *state = tevent_req_data(
     376                req, struct ctdb_client_set_message_handler_state);
     377        struct ctdb_reply_control *reply;
     378        bool status;
     379        int ret;
     380
     381        status = ctdb_client_control_recv(subreq, &ret, state, &reply);
     382        TALLOC_FREE(subreq);
     383        if (! status) {
     384                tevent_req_error(req, ret);
     385                return;
     386        }
     387
     388        ret = ctdb_reply_control_register_srvid(reply);
     389        talloc_free(reply);
     390        if (ret != 0) {
     391                tevent_req_error(req, ret);
     392                return;
     393        }
     394
     395        ret = srvid_register(state->client->srv, state->client, state->srvid,
     396                             state->handler, state->private_data);
     397        if (ret != 0) {
     398                tevent_req_error(req, ret);
     399                return;
     400        }
     401
     402        tevent_req_done(req);
     403}
     404
     405bool ctdb_client_set_message_handler_recv(struct tevent_req *req, int *perr)
     406{
     407        int err;
     408
     409        if (tevent_req_is_unix_error(req, &err)) {
     410                if (perr != NULL) {
     411                        *perr = err;
     412                }
     413                return false;
     414        }
     415        return true;
     416}
     417
     418struct ctdb_client_remove_message_handler_state {
     419        struct ctdb_client_context *client;
     420        uint64_t srvid;
     421        void *private_data;
     422};
     423
     424static void ctdb_client_remove_message_handler_done(struct tevent_req *subreq);
     425
     426struct tevent_req *ctdb_client_remove_message_handler_send(
     427                                        TALLOC_CTX *mem_ctx,
     428                                        struct tevent_context *ev,
     429                                        struct ctdb_client_context *client,
     430                                        uint64_t srvid,
     431                                        void *private_data)
     432{
     433        struct tevent_req *req, *subreq;
     434        struct ctdb_client_remove_message_handler_state *state;
     435        struct ctdb_req_control request;
     436
     437        req = tevent_req_create(mem_ctx, &state,
     438                                struct ctdb_client_remove_message_handler_state);
     439        if (req == NULL) {
     440                return NULL;
     441        }
     442
     443        state->client = client;
     444        state->srvid = srvid;
     445        state->private_data = private_data;
     446
     447        ctdb_req_control_deregister_srvid(&request, srvid);
     448        subreq = ctdb_client_control_send(state, ev, client, client->pnn,
     449                                          tevent_timeval_zero(), &request);
     450        if (tevent_req_nomem(subreq, req)) {
     451                return tevent_req_post(req, ev);
     452        }
     453        tevent_req_set_callback(subreq,
     454                                ctdb_client_remove_message_handler_done, req);
     455
     456        return req;
     457}
     458
     459static void ctdb_client_remove_message_handler_done(struct tevent_req *subreq)
     460{
     461        struct tevent_req *req = tevent_req_callback_data(
     462                subreq, struct tevent_req);
     463        struct ctdb_client_remove_message_handler_state *state = tevent_req_data(
     464                req, struct ctdb_client_remove_message_handler_state);
     465        struct ctdb_reply_control *reply;
     466        bool status;
     467        int ret;
     468
     469        status = ctdb_client_control_recv(subreq, &ret, state, &reply);
     470        TALLOC_FREE(subreq);
     471        if (! status) {
     472                tevent_req_error(req, ret);
     473                return;
     474        }
     475
     476        ret = ctdb_reply_control_deregister_srvid(reply);
     477        talloc_free(reply);
     478        if (ret != 0) {
     479                tevent_req_error(req, ret);
     480                return;
     481        }
     482
     483        ret = srvid_deregister(state->client->srv, state->srvid,
     484                               state->private_data);
     485        if (ret != 0) {
     486                tevent_req_error(req, ret);
     487                return;
     488        }
     489
     490        tevent_req_done(req);
     491}
     492
     493bool ctdb_client_remove_message_handler_recv(struct tevent_req *req, int *perr)
     494{
     495        int err;
     496
     497        if (tevent_req_is_unix_error(req, &err)) {
     498                if (perr != NULL) {
     499                        *perr = err;
     500                }
     501                return false;
     502        }
     503        return true;
    193504}
    194505
  • vendor/current/ctdb/common/rb_tree.c

    r988 r989  
    217217static inline void trbt_set_color(trbt_node_t *node, int color)
    218218{
    219         if ( (node==NULL) && (color==TRBT_BLACK) ) {
     219        if (node == NULL) {
    220220                return;
    221221        }
     
    224224static inline void trbt_set_color_left(trbt_node_t *node, int color)
    225225{
    226         if ( ((node==NULL)||(node->left==NULL)) && (color==TRBT_BLACK) ) {
     226        if (node == NULL || node->left == NULL) {
    227227                return;
    228228        }
     
    231231static inline void trbt_set_color_right(trbt_node_t *node, int color)
    232232{
    233         if ( ((node==NULL)||(node->right==NULL)) && (color==TRBT_BLACK) ) {
     233        if (node == NULL || node->right == NULL) {
    234234                return;
    235235        }
  • vendor/current/ctdb/common/system_linux.c

    r988 r989  
    116116
    117117                /* get the mac address */
    118                 strncpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)-1);
     118                strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
    119119                ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
    120120                if ( ret < 0 ) {
     
    202202
    203203                DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
    204                 strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
     204                strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
    205205                if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
    206206                        DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
     
    210210
    211211                /* get the mac address */
    212                 strncpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)-1);
     212                strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
    213213                ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
    214214                if ( ret < 0 ) {
     
    589589        }
    590590
    591         strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)-1);
     591        strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
    592592        if (ioctl(s, SIOCGIFINDEX, &ifr) < 0 && errno == ENODEV) {
    593593                DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
  • vendor/current/ctdb/config/ctdb.service

    r988 r989  
    11[Unit]
    22Description=CTDB
    3 After=network.target
     3After=network-online.target time-sync.target
    44
    55[Service]
  • vendor/current/ctdb/config/ctdbd_wrapper

    r988 r989  
    182182    if [ -n "$_session" ] ; then
    183183        pkill -9 -s "$_session" 2>/dev/null
    184         rm -f "$pidfile"
    185184    fi
    186185}
  • vendor/current/ctdb/config/events.d/05.system

    r988 r989  
    4848    fi
    4949
    50     if validate_percentage "$_warn_threshold" "$_what" ; then
     50    if validate_percentage "$_warn_threshold" "$_thing" ; then
    5151        if [ "$_usage" -ge "$_warn_threshold" ] ; then
    5252            if [ -r "$_cache" ] ; then
     
    145145END {
    146146    if (memavail != 0) { memfree = memavail ; }
    147     print int((memtotal -  memfree)  / memtotal * 100),
    148           int((swaptotal - swapfree) / swaptotal * 100)
     147    if (memtotal != 0) { print int((memtotal - memfree) / memtotal * 100) ; } else { print 0 ; }
     148    if (swaptotal != 0) { print int((swaptotal - swapfree) / swaptotal * 100) ; } else { print 0 ; }
    149149}')
    150150    _mem_usage="$1"
  • vendor/current/ctdb/doc/ctdb-tunables.7

    r988 r989  
    33.\"    Author:
    44.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
    5 .\"      Date: 01/27/2016
     5.\"      Date: 09/22/2016
    66.\"    Manual: CTDB - clustered TDB database
    77.\"    Source: ctdb
    88.\"  Language: English
    99.\"
    10 .TH "CTDB\-TUNABLES" "7" "01/27/2016" "ctdb" "CTDB \- clustered TDB database"
     10.TH "CTDB\-TUNABLES" "7" "09/22/2016" "ctdb" "CTDB \- clustered TDB database"
    1111.\" -----------------------------------------------------------------
    1212.\" * Define some portability stuff
     
    3838\fBgetvar\fR
    3939commands for more details\&.
    40 .SS "MaxRedirectCount"
     40.PP
     41The tunable variables are listed alphabetically\&.
     42.SS "AllowClientDBAttach"
     43.PP
     44Default: 1
     45.PP
     46When set to 0, clients are not allowed to attach to any databases\&. This can be used to temporarily block any new processes from attaching to and accessing the databases\&. This is mainly used for detaching a volatile database using \*(Aqctdb detach\*(Aq\&.
     47.SS "AllowUnhealthyDBRead"
     48.PP
     49Default: 0
     50.PP
     51When set to 1, ctdb allows database traverses to read unhealthy databases\&. By default, ctdb does not allow reading records from unhealthy databases\&.
     52.SS "ControlTimeout"
     53.PP
     54Default: 60
     55.PP
     56This is the default setting for timeout for when sending a control message to either the local or a remote ctdb daemon\&.
     57.SS "DatabaseHashSize"
     58.PP
     59Default: 100001
     60.PP
     61Number of the hash chains for the local store of the tdbs that ctdb manages\&.
     62.SS "DatabaseMaxDead"
     63.PP
     64Default: 5
     65.PP
     66Maximum number of dead records per hash chain for the tdb databses managed by ctdb\&.
     67.SS "DBRecordCountWarn"
     68.PP
     69Default: 100000
     70.PP
     71When set to non\-zero, ctdb will log a warning during recovery if a database has more than this many records\&. This will produce a warning if a database grows uncontrollably with orphaned records\&.
     72.SS "DBRecordSizeWarn"
     73.PP
     74Default: 10000000
     75.PP
     76When set to non\-zero, ctdb will log a warning during recovery if a single record is bigger than this size\&. This will produce a warning if a database record grows uncontrollably\&.
     77.SS "DBSizeWarn"
     78.PP
     79Default: 1000000000
     80.PP
     81When set to non\-zero, ctdb will log a warning during recovery if a database size is bigger than this\&. This will produce a warning if a database grows uncontrollably\&.
     82.SS "DeferredAttachTO"
     83.PP
     84Default: 120
     85.PP
     86When databases are frozen we do not allow clients to attach to the databases\&. Instead of returning an error immediately to the client, the attach request from the client is deferred until the database becomes available again at which stage we respond to the client\&.
     87.PP
     88This timeout controls how long we will defer the request from the client before timing it out and returning an error to the client\&.
     89.SS "DeterministicIPs"
     90.PP
     91Default: 0
     92.PP
     93When set to 1, ctdb will try to keep public IP addresses locked to specific nodes as far as possible\&. This makes it easier for debugging since you can know that as long as all nodes are healthy public IP X will always be hosted by node Y\&.
     94.PP
     95The cost of using deterministic IP address assignment is that it disables part of the logic where ctdb tries to reduce the number of public IP assignment changes in the cluster\&. This tunable may increase the number of IP failover/failbacks that are performed on the cluster by a small margin\&.
     96.SS "DisableIPFailover"
     97.PP
     98Default: 0
     99.PP
     100When set to non\-zero, ctdb will not perform failover or failback\&. Even if a node fails while holding public IPs, ctdb will not recover the IPs or assign them to another node\&.
     101.PP
     102When this tunable is enabled, ctdb will no longer attempt to recover the cluster by failing IP addresses over to other nodes\&. This leads to a service outage until the administrator has manually performed IP failover to replacement nodes using the \*(Aqctdb moveip\*(Aq command\&.
     103.SS "ElectionTimeout"
    41104.PP
    42105Default: 3
    43106.PP
    44 If we are not the DMASTER and need to fetch a record across the network we first send the request to the LMASTER after which the record is passed onto the current DMASTER\&. If the DMASTER changes before the request has reached that node, the request will be passed onto the "next" DMASTER\&. For very hot records that migrate rapidly across the cluster this can cause a request to "chase" the record for many hops before it catches up with the record\&. this is how many hops we allow trying to chase the DMASTER before we switch back to the LMASTER again to ask for new directions\&.
    45 .PP
    46 When chasing a record, this is how many hops we will chase the record for before going back to the LMASTER to ask for new guidance\&.
    47 .SS "SeqnumInterval"
     107The number of seconds to wait for the election of recovery master to complete\&. If the election is not completed during this interval, then that round of election fails and ctdb starts a new election\&.
     108.SS "EnableBans"
     109.PP
     110Default: 1
     111.PP
     112This parameter allows ctdb to ban a node if the node is misbehaving\&.
     113.PP
     114When set to 0, this disables banning completely in the cluster and thus nodes can not get banned, even it they break\&. Don\*(Aqt set to 0 unless you know what you are doing\&. You should set this to the same value on all nodes to avoid unexpected behaviour\&.
     115.SS "EventScriptTimeout"
     116.PP
     117Default: 30
     118.PP
     119Maximum time in seconds to allow an event to run before timing out\&. This is the total time for all enabled scripts that are run for an event, not just a single event script\&.
     120.PP
     121Note that timeouts are ignored for some events ("takeip", "releaseip", "startrecovery", "recovered") and converted to success\&. The logic here is that the callers of these events implement their own additional timeout\&.
     122.SS "FetchCollapse"
     123.PP
     124Default: 1
     125.PP
     126This parameter is used to avoid multiple migration requests for the same record from a single node\&. All the record requests for the same record are queued up and processed when the record is migrated to the current node\&.
     127.PP
     128When many clients across many nodes try to access the same record at the same time this can lead to a fetch storm where the record becomes very active and bounces between nodes very fast\&. This leads to high CPU utilization of the ctdbd daemon, trying to bounce that record around very fast, and poor performance\&. This can improve performance and reduce CPU utilization for certain workloads\&.
     129.SS "HopcountMakeSticky"
     130.PP
     131Default: 50
     132.PP
     133For database(s) marked STICKY (using \*(Aqctdb setdbsticky\*(Aq), any record that is migrating so fast that hopcount exceeds this limit is marked as STICKY record for
     134\fIStickyDuration\fR
     135seconds\&. This means that after each migration the sticky record will be kept on the node
     136\fIStickyPindown\fRmilliseconds and prevented from being migrated off the node\&.
     137.PP
     138This will improve performance for certain workloads, such as locking\&.tdb if many clients are opening/closing the same file concurrently\&.
     139.SS "KeepaliveInterval"
     140.PP
     141Default: 5
     142.PP
     143How often in seconds should the nodes send keep\-alive packets to each other\&.
     144.SS "KeepaliveLimit"
     145.PP
     146Default: 5
     147.PP
     148After how many keepalive intervals without any traffic should a node wait until marking the peer as DISCONNECTED\&.
     149.PP
     150If a node has hung, it can take
     151\fIKeepaliveInterval\fR
     152* (\fIKeepaliveLimit\fR
     153+ 1) seconds before ctdb determines that the node is DISCONNECTED and performs a recovery\&. This limit should not be set too high to enable early detection and avoid any application timeouts (e\&.g\&. SMB1) to kick in before the fail over is completed\&.
     154.SS "LCP2PublicIPs"
     155.PP
     156Default: 1
     157.PP
     158When set to 1, ctdb uses the LCP2 ip allocation algorithm\&.
     159.SS "LockProcessesPerDB"
     160.PP
     161Default: 200
     162.PP
     163This is the maximum number of lock helper processes ctdb will create for obtaining record locks\&. When ctdb cannot get a record lock without blocking, it creates a helper process that waits for the lock to be obtained\&.
     164.SS "LogLatencyMs"
     165.PP
     166Default: 0
     167.PP
     168When set to non\-zero, ctdb will log if certains operations take longer than this value, in milliseconds, to complete\&. These operations include "process a record request from client", "take a record or database lock", "update a persistent database record" and "vaccum a database"\&.
     169.SS "MaxQueueDropMsg"
     170.PP
     171Default: 1000000
     172.PP
     173This is the maximum number of messages to be queued up for a client before ctdb will treat the client as hung and will terminate the client connection\&.
     174.SS "MonitorInterval"
     175.PP
     176Default: 15
     177.PP
     178How often should ctdb run the \*(Aqmonitor\*(Aq event in seconds to check for a node\*(Aqs health\&.
     179.SS "MonitorTimeoutCount"
     180.PP
     181Default: 20
     182.PP
     183How many \*(Aqmonitor\*(Aq events in a row need to timeout before a node is flagged as UNHEALTHY\&. This setting is useful if scripts can not be written so that they do not hang for benign reasons\&.
     184.SS "NoIPFailback"
     185.PP
     186Default: 0
     187.PP
     188When set to 1, ctdb will not perform failback of IP addresses when a node becomes healthy\&. When a node becomes UNHEALTHY, ctdb WILL perform failover of public IP addresses, but when the node becomes HEALTHY again, ctdb will not fail the addresses back\&.
     189.PP
     190Use with caution! Normally when a node becomes available to the cluster ctdb will try to reassign public IP addresses onto the new node as a way to distribute the workload evenly across the clusternode\&. Ctdb tries to make sure that all running nodes have approximately the same number of public addresses it hosts\&.
     191.PP
     192When you enable this tunable, ctdb will no longer attempt to rebalance the cluster by failing IP addresses back to the new nodes\&. An unbalanced cluster will therefore remain unbalanced until there is manual intervention from the administrator\&. When this parameter is set, you can manually fail public IP addresses over to the new node(s) using the \*(Aqctdb moveip\*(Aq command\&.
     193.SS "NoIPHostOnAllDisabled"
     194.PP
     195Default: 0
     196.PP
     197If no nodes are HEALTHY then by default ctdb will happily host public IPs on disabled (unhealthy or administratively disabled) nodes\&. This can cause problems, for example if the underlying cluster filesystem is not mounted\&. When set to 1 on a node and that node is disabled, any IPs hosted by this node will be released and the node will not takeover any IPs until it is no longer disabled\&.
     198.SS "NoIPTakeover"
     199.PP
     200Default: 0
     201.PP
     202When set to 1, ctdb will not allow IP addresses to be failed over onto this node\&. Any IP addresses that the node currently hosts will remain on the node but no new IP addresses can be failed over to the node\&.
     203.SS "PullDBPreallocation"
     204.PP
     205Default: 10*1024*1024
     206.PP
     207This is the size of a record buffer to pre\-allocate for sending reply to PULLDB control\&. Usually record buffer starts with size of the first record and gets reallocated every time a new record is added to the record buffer\&. For a large number of records, this can be very inefficient to grow the record buffer one record at a time\&.
     208.SS "RecBufferSizeLimit"
     209.PP
     210Default: 1000000
     211.PP
     212This is the limit on the size of the record buffer to be sent in various controls\&. This limit is used by new controls used for recovery and controls used in vacuuming\&.
     213.SS "RecdFailCount"
     214.PP
     215Default: 10
     216.PP
     217If the recovery daemon has failed to ping the main dameon for this many consecutive intervals, the main daemon will consider the recovery daemon as hung and will try to restart it to recover\&.
     218.SS "RecdPingTimeout"
     219.PP
     220Default: 60
     221.PP
     222If the main dameon has not heard a "ping" from the recovery dameon for this many seconds, the main dameon will log a message that the recovery daemon is potentially hung\&. This also increments a counter which is checked against
     223\fIRecdFailCount\fR
     224for detection of hung recovery daemon\&.
     225.SS "RecLockLatencyMs"
    48226.PP
    49227Default: 1000
    50228.PP
    51 Some databases have seqnum tracking enabled, so that samba will be able to detect asynchronously when there has been updates to the database\&. Everytime a database is updated its sequence number is increased\&.
    52 .PP
    53 This tunable is used to specify in \*(Aqms\*(Aq how frequently ctdb will send out updates to remote nodes to inform them that the sequence number is increased\&.
    54 .SS "ControlTimeout"
    55 .PP
    56 Default: 60
    57 .PP
    58 This is the default setting for timeout for when sending a control message to either the local or a remote ctdb daemon\&.
    59 .SS "TraverseTimeout"
    60 .PP
    61 Default: 20
    62 .PP
    63 This setting controls how long we allow a traverse process to run\&. After this timeout triggers, the main ctdb daemon will abort the traverse if it has not yet finished\&.
    64 .SS "KeepaliveInterval"
    65 .PP
    66 Default: 5
    67 .PP
    68 How often in seconds should the nodes send keepalives to eachother\&.
    69 .SS "KeepaliveLimit"
    70 .PP
    71 Default: 5
    72 .PP
    73 After how many keepalive intervals without any traffic should a node wait until marking the peer as DISCONNECTED\&.
    74 .PP
    75 If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1) seconds before we determine that the node is DISCONNECTED and that we require a recovery\&. This limitshould not be set too high since we want a hung node to be detectec, and expunged from the cluster well before common CIFS timeouts (45\-90 seconds) kick in\&.
     229When using a reclock file for split brain prevention, if set to non\-zero this tunable will make the recovery dameon log a message if the fcntl() call to lock/testlock the recovery file takes longer than this number of milliseconds\&.
     230.SS "RecoverInterval"
     231.PP
     232Default: 1
     233.PP
     234How frequently in seconds should the recovery daemon perform the consistency checks to determine if it should perform a recovery\&.
     235.SS "RecoverPDBBySeqNum"
     236.PP
     237Default: 1
     238.PP
     239When set to zero, database recovery for persistent databases is record\-by\-record and recovery process simply collects the most recent version of every individual record\&.
     240.PP
     241When set to non\-zero, persistent databases will instead be recovered as a whole db and not by individual records\&. The node that contains the highest value stored in the record "__db_sequence_number__" is selected and the copy of that nodes database is used as the recovered database\&.
     242.PP
     243By default, recovery of persistent databses is done using __db_sequence_number__ record\&.
    76244.SS "RecoverTimeout"
    77245.PP
    78 Default: 20
     246Default: 120
    79247.PP
    80248This is the default setting for timeouts for controls when sent from the recovery daemon\&. We allow longer control timeouts from the recovery daemon than from normal use since the recovery dameon often use controls that can take a lot longer than normal controls\&.
    81 .SS "RecoverInterval"
    82 .PP
    83 Default: 1
    84 .PP
    85 How frequently in seconds should the recovery daemon perform the consistency checks that determine if we need to perform a recovery or not\&.
    86 .SS "ElectionTimeout"
    87 .PP
    88 Default: 3
    89 .PP
    90 When electing a new recovery master, this is how many seconds we allow the election to take before we either deem the election finished or we fail the election and start a new one\&.
    91 .SS "TakeoverTimeout"
    92 .PP
    93 Default: 9
    94 .PP
    95 This is how many seconds we allow controls to take for IP failover events\&.
    96 .SS "MonitorInterval"
    97 .PP
    98 Default: 15
    99 .PP
    100 How often should ctdb run the event scripts to check for a nodes health\&.
    101 .SS "TickleUpdateInterval"
    102 .PP
    103 Default: 20
    104 .PP
    105 How often will ctdb record and store the "tickle" information used to kickstart stalled tcp connections after a recovery\&.
    106 .SS "EventScriptTimeout"
    107 .PP
    108 Default: 30
    109 .PP
    110 Maximum time in seconds to allow an event to run before timing out\&. This is the total time for all enabled scripts that are run for an event, not just a single event script\&.
    111 .PP
    112 Note that timeouts are ignored for some events ("takeip", "releaseip", "startrecovery", "recovered") and converted to success\&. The logic here is that the callers of these events implement their own additional timeout\&.
    113 .SS "MonitorTimeoutCount"
    114 .PP
    115 Default: 20
    116 .PP
    117 How many monitor events in a row need to timeout before a node is flagged as UNHEALTHY\&. This setting is useful if scripts can not be written so that they do not hang for benign reasons\&.
     249.SS "RecoveryBanPeriod"
     250.PP
     251Default: 300
     252.PP
     253The duration in seconds for which a node is banned if the node fails during recovery\&. After this time has elapsed the node will automatically get unbanned and will attempt to rejoin the cluster\&.
     254.PP
     255A node usually gets banned due to real problems with the node\&. Don\*(Aqt set this value too small\&. Otherwise, a problematic node will try to re\-join cluster too soon causing unnecessary recoveries\&.
     256.SS "RecoveryDropAllIPs"
     257.PP
     258Default: 120
     259.PP
     260If a node is stuck in recovery, or stopped, or banned, for this many seconds, then ctdb will release all public addresses on that node\&.
    118261.SS "RecoveryGracePeriod"
    119262.PP
    120263Default: 120
    121264.PP
    122 During recoveries, if a node has not caused recovery failures during the last grace period, any records of transgressions that the node has caused recovery failures will be forgiven\&. This resets the ban\-counter back to zero for that node\&.
    123 .SS "RecoveryBanPeriod"
    124 .PP
    125 Default: 300
    126 .PP
    127 If a node becomes banned causing repetitive recovery failures\&. The node will eventually become banned from the cluster\&. This controls how long the culprit node will be banned from the cluster before it is allowed to try to join the cluster again\&. Don\*(Aqt set to small\&. A node gets banned for a reason and it is usually due to real problems with the node\&.
    128 .SS "DatabaseHashSize"
    129 .PP
    130 Default: 100001
    131 .PP
    132 Size of the hash chains for the local store of the tdbs that ctdb manages\&.
    133 .SS "DatabaseMaxDead"
    134 .PP
    135 Default: 5
    136 .PP
    137 How many dead records per hashchain in the TDB database do we allow before the freelist needs to be processed\&.
    138 .SS "RerecoveryTimeout"
    139 .PP
    140 Default: 10
    141 .PP
    142 Once a recovery has completed, no additional recoveries are permitted until this timeout has expired\&.
    143 .SS "EnableBans"
    144 .PP
    145 Default: 1
    146 .PP
    147 When set to 0, this disables BANNING completely in the cluster and thus nodes can not get banned, even it they break\&. Don\*(Aqt set to 0 unless you know what you are doing\&. You should set this to the same value on all nodes to avoid unexpected behaviour\&.
    148 .SS "DeterministicIPs"
    149 .PP
    150 Default: 0
    151 .PP
    152 When enabled, this tunable makes ctdb try to keep public IP addresses locked to specific nodes as far as possible\&. This makes it easier for debugging since you can know that as long as all nodes are healthy public IP X will always be hosted by node Y\&.
    153 .PP
    154 The cost of using deterministic IP address assignment is that it disables part of the logic where ctdb tries to reduce the number of public IP assignment changes in the cluster\&. This tunable may increase the number of IP failover/failbacks that are performed on the cluster by a small margin\&.
    155 .SS "LCP2PublicIPs"
    156 .PP
    157 Default: 1
    158 .PP
    159 When enabled this switches ctdb to use the LCP2 ip allocation algorithm\&.
    160 .SS "ReclockPingPeriod"
    161 .PP
    162 Default: x
    163 .PP
    164 Obsolete
    165 .SS "NoIPFailback"
    166 .PP
    167 Default: 0
    168 .PP
    169 When set to 1, ctdb will not perform failback of IP addresses when a node becomes healthy\&. Ctdb WILL perform failover of public IP addresses when a node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb will not fail the addresses back\&.
    170 .PP
    171 Use with caution! Normally when a node becomes available to the cluster ctdb will try to reassign public IP addresses onto the new node as a way to distribute the workload evenly across the clusternode\&. Ctdb tries to make sure that all running nodes have approximately the same number of public addresses it hosts\&.
    172 .PP
    173 When you enable this tunable, CTDB will no longer attempt to rebalance the cluster by failing IP addresses back to the new nodes\&. An unbalanced cluster will therefore remain unbalanced until there is manual intervention from the administrator\&. When this parameter is set, you can manually fail public IP addresses over to the new node(s) using the \*(Aqctdb moveip\*(Aq command\&.
    174 .SS "DisableIPFailover"
    175 .PP
    176 Default: 0
    177 .PP
    178 When enabled, ctdb will not perform failover or failback\&. Even if a node fails while holding public IPs, ctdb will not recover the IPs or assign them to another node\&.
    179 .PP
    180 When you enable this tunable, CTDB will no longer attempt to recover the cluster by failing IP addresses over to other nodes\&. This leads to a service outage until the administrator has manually performed failover to replacement nodes using the \*(Aqctdb moveip\*(Aq command\&.
    181 .SS "NoIPTakeover"
    182 .PP
    183 Default: 0
    184 .PP
    185 When set to 1, ctdb will not allow IP addresses to be failed over onto this node\&. Any IP addresses that the node currently hosts will remain on the node but no new IP addresses can be failed over to the node\&.
    186 .SS "NoIPHostOnAllDisabled"
    187 .PP
    188 Default: 0
    189 .PP
    190 If no nodes are healthy then by default ctdb will happily host public IPs on disabled (unhealthy or administratively disabled) nodes\&. This can cause problems, for example if the underlying cluster filesystem is not mounted\&. When set to 1 on a node and that node is disabled it, any IPs hosted by this node will be released and the node will not takeover any IPs until it is no longer disabled\&.
    191 .SS "DBRecordCountWarn"
    192 .PP
    193 Default: 100000
    194 .PP
    195 When set to non\-zero, ctdb will log a warning when we try to recover a database with more than this many records\&. This will produce a warning if a database grows uncontrollably with orphaned records\&.
    196 .SS "DBRecordSizeWarn"
    197 .PP
    198 Default: 10000000
    199 .PP
    200 When set to non\-zero, ctdb will log a warning when we try to recover a database where a single record is bigger than this\&. This will produce a warning if a database record grows uncontrollably with orphaned sub\-records\&.
    201 .SS "DBSizeWarn"
    202 .PP
    203 Default: 1000000000
    204 .PP
    205 When set to non\-zero, ctdb will log a warning when we try to recover a database bigger than this\&. This will produce a warning if a database grows uncontrollably\&.
    206 .SS "VerboseMemoryNames"
    207 .PP
    208 Default: 0
    209 .PP
    210 This feature consumes additional memory\&. when used the talloc library will create more verbose names for all talloc allocated objects\&.
    211 .SS "RecdPingTimeout"
    212 .PP
    213 Default: 60
    214 .PP
    215 If the main dameon has not heard a "ping" from the recovery dameon for this many seconds, the main dameon will log a message that the recovery daemon is potentially hung\&.
    216 .SS "RecdFailCount"
    217 .PP
    218 Default: 10
    219 .PP
    220 If the recovery daemon has failed to ping the main dameon for this many consecutive intervals, the main daemon will consider the recovery daemon as hung and will try to restart it to recover\&.
    221 .SS "LogLatencyMs"
    222 .PP
    223 Default: 0
    224 .PP
    225 When set to non\-zero, this will make the main daemon log any operation that took longer than this value, in \*(Aqms\*(Aq, to complete\&. These include "how long time a lockwait child process needed", "how long time to write to a persistent database" but also "how long did it take to get a response to a CALL from a remote node"\&.
    226 .SS "RecLockLatencyMs"
    227 .PP
    228 Default: 1000
    229 .PP
    230 When using a reclock file for split brain prevention, if set to non\-zero this tunable will make the recovery dameon log a message if the fcntl() call to lock/testlock the recovery file takes longer than this number of ms\&.
    231 .SS "RecoveryDropAllIPs"
    232 .PP
    233 Default: 120
    234 .PP
    235 If we have been stuck in recovery, or stopped, or banned, mode for this many seconds we will force drop all held public addresses\&.
    236 .SS "VacuumInterval"
    237 .PP
    238 Default: 10
    239 .PP
    240 Periodic interval in seconds when vacuuming is triggered for volatile databases\&.
    241 .SS "VacuumMaxRunTime"
    242 .PP
    243 Default: 120
    244 .PP
    245 The maximum time in seconds for which the vacuuming process is allowed to run\&. If vacuuming process takes longer than this value, then the vacuuming process is terminated\&.
     265During recoveries, if a node has not caused recovery failures during the last grace period in seconds, any records of transgressions that the node has caused recovery failures will be forgiven\&. This resets the ban\-counter back to zero for that node\&.
    246266.SS "RepackLimit"
    247267.PP
     
    249269.PP
    250270During vacuuming, if the number of freelist records are more than
    251 \fIRepackLimit\fR, then databases are repacked to get rid of the freelist records to avoid fragmentation\&.
     271\fIRepackLimit\fR, then the database is repacked to get rid of the freelist records to avoid fragmentation\&.
    252272.PP
    253273Databases are repacked only if both
     
    256276\fIVacuumLimit\fR
    257277are exceeded\&.
     278.SS "RerecoveryTimeout"
     279.PP
     280Default: 10
     281.PP
     282Once a recovery has completed, no additional recoveries are permitted until this timeout in seconds has expired\&.
     283.SS "Samba3AvoidDeadlocks"
     284.PP
     285Default: 0
     286.PP
     287If set to non\-zero, enable code that prevents deadlocks with Samba (only for Samba 3\&.x)\&.
     288.PP
     289This should be set to 1 only when using Samba version 3\&.x to enable special code in ctdb to avoid deadlock with Samba version 3\&.x\&. This code is not required for Samba version 4\&.x and must not be enabled for Samba 4\&.x\&.
     290.SS "SeqnumInterval"
     291.PP
     292Default: 1000
     293.PP
     294Some databases have seqnum tracking enabled, so that samba will be able to detect asynchronously when there has been updates to the database\&. Everytime a database is updated its sequence number is increased\&.
     295.PP
     296This tunable is used to specify in milliseconds how frequently ctdb will send out updates to remote nodes to inform them that the sequence number is increased\&.
     297.SS "StatHistoryInterval"
     298.PP
     299Default: 1
     300.PP
     301Granularity of the statistics collected in the statistics history\&. This is reported by \*(Aqctdb stats\*(Aq command\&.
     302.SS "StickyDuration"
     303.PP
     304Default: 600
     305.PP
     306Once a record has been marked STICKY, this is the duration in seconds, the record will be flagged as a STICKY record\&.
     307.SS "StickyPindown"
     308.PP
     309Default: 200
     310.PP
     311Once a STICKY record has been migrated onto a node, it will be pinned down on that node for this number of milliseconds\&. Any request from other nodes to migrate the record off the node will be deferred\&.
     312.SS "TakeoverTimeout"
     313.PP
     314Default: 9
     315.PP
     316This is the duration in seconds in which ctdb tries to complete IP failover\&.
     317.SS "TDBMutexEnabled"
     318.PP
     319Default: 0
     320.PP
     321This paramter enables TDB_MUTEX_LOCKING feature on volatile databases if the robust mutexes are supported\&. This optimizes the record locking using robust mutexes and is much more efficient that using posix locks\&.
     322.SS "TickleUpdateInterval"
     323.PP
     324Default: 20
     325.PP
     326Every
     327\fITickleUpdateInterval\fR
     328seconds, ctdb synchronizes the client connection information across nodes\&.
     329.SS "TraverseTimeout"
     330.PP
     331Default: 20
     332.PP
     333This is the duration in seconds for which a database traverse is allowed to run\&. If the traverse does not complete during this interval, ctdb will abort the traverse\&.
     334.SS "VacuumFastPathCount"
     335.PP
     336Default: 60
     337.PP
     338During a vacuuming run, ctdb usually processes only the records marked for deletion also called the fast path vacuuming\&. After finishing
     339\fIVacuumFastPathCount\fR
     340number of fast path vacuuming runs, ctdb will trigger a scan of complete database for any empty records that need to be deleted\&.
     341.SS "VacuumInterval"
     342.PP
     343Default: 10
     344.PP
     345Periodic interval in seconds when vacuuming is triggered for volatile databases\&.
    258346.SS "VacuumLimit"
    259347.PP
     
    268356\fIVacuumLimit\fR
    269357are exceeded\&.
    270 .SS "VacuumFastPathCount"
    271 .PP
    272 Default: 60
    273 .PP
    274 When a record is deleted, it is marked for deletion during vacuuming\&. Vacuuming process usually processes this list to purge the records from the database\&. If the number of records marked for deletion are more than VacuumFastPathCount, then vacuuming process will scan the complete database for empty records instead of using the list of records marked for deletion\&.
    275 .SS "DeferredAttachTO"
    276 .PP
    277 Default: 120
    278 .PP
    279 When databases are frozen we do not allow clients to attach to the databases\&. Instead of returning an error immediately to the application the attach request from the client is deferred until the database becomes available again at which stage we respond to the client\&.
    280 .PP
    281 This timeout controls how long we will defer the request from the client before timing it out and returning an error to the client\&.
    282 .SS "HopcountMakeSticky"
    283 .PP
    284 Default: 50
    285 .PP
    286 If the database is set to \*(AqSTICKY\*(Aq mode, using the \*(Aqctdb setdbsticky\*(Aq command, any record that is seen as very hot and migrating so fast that hopcount surpasses 50 is set to become a STICKY record for StickyDuration seconds\&. This means that after each migration the record will be kept on the node and prevented from being migrated off the node\&.
    287 .PP
    288 This setting allows one to try to identify such records and stop them from migrating across the cluster so fast\&. This will improve performance for certain workloads, such as locking\&.tdb if many clients are opening/closing the same file concurrently\&.
    289 .SS "StickyDuration"
    290 .PP
    291 Default: 600
    292 .PP
    293 Once a record has been found to be fetch\-lock hot and has been flagged to become STICKY, this is for how long, in seconds, the record will be flagged as a STICKY record\&.
    294 .SS "StickyPindown"
    295 .PP
    296 Default: 200
    297 .PP
    298 Once a STICKY record has been migrated onto a node, it will be pinned down on that node for this number of ms\&. Any request from other nodes to migrate the record off the node will be deferred until the pindown timer expires\&.
    299 .SS "StatHistoryInterval"
    300 .PP
    301 Default: 1
    302 .PP
    303 Granularity of the statistics collected in the statistics history\&.
    304 .SS "AllowClientDBAttach"
    305 .PP
    306 Default: 1
    307 .PP
    308 When set to 0, clients are not allowed to attach to any databases\&. This can be used to temporarily block any new processes from attaching to and accessing the databases\&.
    309 .SS "RecoverPDBBySeqNum"
    310 .PP
    311 Default: 1
    312 .PP
    313 When set to zero, database recovery for persistent databases is record\-by\-record and recovery process simply collects the most recent version of every individual record\&.
    314 .PP
    315 When set to non\-zero, persistent databases will instead be recovered as a whole db and not by individual records\&. The node that contains the highest value stored in the record "__db_sequence_number__" is selected and the copy of that nodes database is used as the recovered database\&.
    316 .PP
    317 By default, recovery of persistent databses is done using __db_sequence_number__ record\&.
    318 .SS "FetchCollapse"
    319 .PP
    320 Default: 1
    321 .PP
    322 When many clients across many nodes try to access the same record at the same time this can lead to a fetch storm where the record becomes very active and bounces between nodes very fast\&. This leads to high CPU utilization of the ctdbd daemon, trying to bounce that record around very fast, and poor performance\&.
    323 .PP
    324 This parameter is used to activate a fetch\-collapse\&. A fetch\-collapse is when we track which records we have requests in flight so that we only keep one request in flight from a certain node, even if multiple smbd processes are attemtping to fetch the record at the same time\&. This can improve performance and reduce CPU utilization for certain workloads\&.
    325 .PP
    326 This timeout controls if we should collapse multiple fetch operations of the same record into a single request and defer all duplicates or not\&.
    327 .SS "Samba3AvoidDeadlocks"
    328 .PP
    329 Default: 0
    330 .PP
    331 Enable code that prevents deadlocks with Samba (only for Samba 3\&.x)\&.
    332 .PP
    333 This should be set to 1 when using Samba version 3\&.x to enable special code in CTDB to avoid deadlock with Samba version 3\&.x\&. This code is not required for Samba version 4\&.x and must not be enabled for Samba 4\&.x\&.
     358.SS "VacuumMaxRunTime"
     359.PP
     360Default: 120
     361.PP
     362The maximum time in seconds for which the vacuuming process is allowed to run\&. If vacuuming process takes longer than this value, then the vacuuming process is terminated\&.
     363.SS "VerboseMemoryNames"
     364.PP
     365Default: 0
     366.PP
     367When set to non\-zero, ctdb assigns verbose names for some of the talloc allocated memory objects\&. These names are visible in the talloc memory report generated by \*(Aqctdb dumpmemory\*(Aq\&.
    334368.SH "SEE ALSO"
    335369.PP
  • vendor/current/ctdb/doc/ctdb-tunables.7.html

    r988 r989  
    1 <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb-tunables</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdb-tunables.7"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb-tunables &#8212; CTDB tunable configuration variables</p></div><div class="refsect1"><a name="idp52032112"></a><h2>DESCRIPTION</h2><p>
     1<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb-tunables</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdb-tunables.7"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb-tunables &#8212; CTDB tunable configuration variables</p></div><div class="refsect1"><a name="idp51068080"></a><h2>DESCRIPTION</h2><p>
    22      CTDB's behaviour can be configured by setting run-time tunable
    33      variables.  This lists and describes all tunables.  See the
     
    55      <span class="command"><strong>listvars</strong></span>, <span class="command"><strong>setvar</strong></span> and
    66      <span class="command"><strong>getvar</strong></span> commands for more details.
    7     </p><div class="refsect2"><a name="idp52844128"></a><h3>MaxRedirectCount</h3><p>Default: 3</p><p>
    8         If we are not the DMASTER and need to fetch a record across the network
    9         we first send the request to the LMASTER after which the record
    10         is passed onto the current DMASTER. If the DMASTER changes before
    11         the request has reached that node, the request will be passed onto the
    12         "next" DMASTER. For very hot records that migrate rapidly across the
    13         cluster this can cause a request to "chase" the record for many hops
    14         before it catches up with the record.
    15 
    16         this is how many hops we allow trying to chase the DMASTER before we
    17         switch back to the LMASTER again to ask for new directions.
    18       </p><p>
    19         When chasing a record, this is how many hops we will chase the record
    20         for before going back to the LMASTER to ask for new guidance.
    21       </p></div><div class="refsect2"><a name="idp52639696"></a><h3>SeqnumInterval</h3><p>Default: 1000</p><p>
    22         Some databases have seqnum tracking enabled, so that samba will be able
    23         to detect asynchronously when there has been updates to the database.
    24         Everytime a database is updated its sequence number is increased.
    25       </p><p>
    26         This tunable is used to specify in 'ms' how frequently ctdb will
    27         send out updates to remote nodes to inform them that the sequence
    28         number is increased.
    29       </p></div><div class="refsect2"><a name="idp52023488"></a><h3>ControlTimeout</h3><p>Default: 60</p><p>
    30         This is the default
    31         setting for timeout for when sending a control message to either the
    32         local or a remote ctdb daemon.
    33       </p></div><div class="refsect2"><a name="idp51243376"></a><h3>TraverseTimeout</h3><p>Default: 20</p><p>
    34         This setting controls how long we allow a traverse process to run.
    35         After this timeout triggers, the main ctdb daemon will abort the
    36         traverse if it has not yet finished.
    37       </p></div><div class="refsect2"><a name="idp50157008"></a><h3>KeepaliveInterval</h3><p>Default: 5</p><p>
    38         How often in seconds should the nodes send keepalives to eachother.
    39       </p></div><div class="refsect2"><a name="idp49234000"></a><h3>KeepaliveLimit</h3><p>Default: 5</p><p>
    40         After how many keepalive intervals without any traffic should a node
    41         wait until marking the peer as DISCONNECTED.
    42       </p><p>
    43         If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1)
    44         seconds before we determine that the node is DISCONNECTED and that we
    45         require a recovery. This limitshould not be set too high since we want
    46         a hung node to be detectec, and expunged from the cluster well before
    47         common CIFS timeouts (45-90 seconds) kick in.
    48       </p></div><div class="refsect2"><a name="idp53887184"></a><h3>RecoverTimeout</h3><p>Default: 20</p><p>
    49         This is the default setting for timeouts for controls when sent from the
    50         recovery daemon. We allow longer control timeouts from the recovery daemon
    51         than from normal use since the recovery dameon often use controls that
    52         can take a lot longer than normal controls.
    53       </p></div><div class="refsect2"><a name="idp53889072"></a><h3>RecoverInterval</h3><p>Default: 1</p><p>
    54         How frequently in seconds should the recovery daemon perform the
    55         consistency checks that determine if we need to perform a recovery or not.
    56       </p></div><div class="refsect2"><a name="idp53890832"></a><h3>ElectionTimeout</h3><p>Default: 3</p><p>
    57         When electing a new recovery master, this is how many seconds we allow
    58         the election to take before we either deem the election finished
    59         or we fail the election and start a new one.
    60       </p></div><div class="refsect2"><a name="idp53892640"></a><h3>TakeoverTimeout</h3><p>Default: 9</p><p>
    61         This is how many seconds we allow controls to take for IP failover events.
    62       </p></div><div class="refsect2"><a name="idp53894240"></a><h3>MonitorInterval</h3><p>Default: 15</p><p>
    63         How often should ctdb run the event scripts to check for a nodes health.
    64       </p></div><div class="refsect2"><a name="idp53895840"></a><h3>TickleUpdateInterval</h3><p>Default: 20</p><p>
    65         How often will ctdb record and store the "tickle" information used to
    66         kickstart stalled tcp connections after a recovery.
    67       </p></div><div class="refsect2"><a name="idp53897584"></a><h3>EventScriptTimeout</h3><p>Default: 30</p><p>
     7    </p><p>
     8      The tunable variables are listed alphabetically.
     9    </p><div class="refsect2"><a name="idp51120048"></a><h3>AllowClientDBAttach</h3><p>Default: 1</p><p>
     10        When set to 0, clients are not allowed to attach to any databases.
     11        This can be used to temporarily block any new processes from
     12        attaching to and accessing the databases.  This is mainly used
     13        for detaching a volatile database using 'ctdb detach'.
     14      </p></div><div class="refsect2"><a name="idp53889776"></a><h3>AllowUnhealthyDBRead</h3><p>Default: 0</p><p>
     15        When set to 1, ctdb allows database traverses to read unhealthy
     16        databases.  By default, ctdb does not allow reading records from
     17        unhealthy databases.
     18      </p></div><div class="refsect2"><a name="idp54131312"></a><h3>ControlTimeout</h3><p>Default: 60</p><p>
     19        This is the default setting for timeout for when sending a
     20        control message to either the local or a remote ctdb daemon.
     21      </p></div><div class="refsect2"><a name="idp51364816"></a><h3>DatabaseHashSize</h3><p>Default: 100001</p><p>
     22        Number of the hash chains for the local store of the tdbs that
     23        ctdb manages.
     24      </p></div><div class="refsect2"><a name="idp53157488"></a><h3>DatabaseMaxDead</h3><p>Default: 5</p><p>
     25        Maximum number of dead records per hash chain for the tdb databses
     26        managed by ctdb.
     27      </p></div><div class="refsect2"><a name="idp50010288"></a><h3>DBRecordCountWarn</h3><p>Default: 100000</p><p>
     28        When set to non-zero, ctdb will log a warning during recovery if
     29        a database has more than this many records. This will produce a
     30        warning if a database grows uncontrollably with orphaned records.
     31      </p></div><div class="refsect2"><a name="idp49085760"></a><h3>DBRecordSizeWarn</h3><p>Default: 10000000</p><p>
     32        When set to non-zero, ctdb will log a warning during recovery
     33        if a single record is bigger than this size. This will produce
     34        a warning if a database record grows uncontrollably.
     35      </p></div><div class="refsect2"><a name="idp49087568"></a><h3>DBSizeWarn</h3><p>Default: 1000000000</p><p>
     36        When set to non-zero, ctdb will log a warning during recovery if
     37        a database size is bigger than this. This will produce a warning
     38        if a database grows uncontrollably.
     39      </p></div><div class="refsect2"><a name="idp49089360"></a><h3>DeferredAttachTO</h3><p>Default: 120</p><p>
     40        When databases are frozen we do not allow clients to attach to
     41        the databases. Instead of returning an error immediately to the
     42        client, the attach request from the client is deferred until
     43        the database becomes available again at which stage we respond
     44        to the client.
     45      </p><p>
     46        This timeout controls how long we will defer the request from the
     47        client before timing it out and returning an error to the client.
     48      </p></div><div class="refsect2"><a name="idp54043296"></a><h3>DeterministicIPs</h3><p>Default: 0</p><p>
     49        When set to 1, ctdb will try to keep public IP addresses locked
     50        to specific nodes as far as possible. This makes it easier
     51        for debugging since you can know that as long as all nodes are
     52        healthy public IP X will always be hosted by node Y.
     53      </p><p>
     54        The cost of using deterministic IP address assignment is that it
     55        disables part of the logic where ctdb tries to reduce the number
     56        of public IP assignment changes in the cluster. This tunable may
     57        increase the number of IP failover/failbacks that are performed
     58        on the cluster by a small margin.
     59      </p></div><div class="refsect2"><a name="idp54045872"></a><h3>DisableIPFailover</h3><p>Default: 0</p><p>
     60        When set to non-zero, ctdb will not perform failover or
     61        failback. Even if a node fails while holding public IPs, ctdb
     62        will not recover the IPs or assign them to another node.
     63      </p><p>
     64        When this tunable is enabled, ctdb will no longer attempt
     65        to recover the cluster by failing IP addresses over to other
     66        nodes. This leads to a service outage until the administrator
     67        has manually performed IP failover to replacement nodes using the
     68        'ctdb moveip' command.
     69      </p></div><div class="refsect2"><a name="idp54048368"></a><h3>ElectionTimeout</h3><p>Default: 3</p><p>
     70        The number of seconds to wait for the election of recovery
     71        master to complete. If the election is not completed during this
     72        interval, then that round of election fails and ctdb starts a
     73        new election.
     74      </p></div><div class="refsect2"><a name="idp54050192"></a><h3>EnableBans</h3><p>Default: 1</p><p>
     75        This parameter allows ctdb to ban a node if the node is misbehaving.
     76      </p><p>
     77        When set to 0, this disables banning completely in the cluster
     78        and thus nodes can not get banned, even it they break. Don't
     79        set to 0 unless you know what you are doing.  You should set
     80        this to the same value on all nodes to avoid unexpected behaviour.
     81      </p></div><div class="refsect2"><a name="idp54052448"></a><h3>EventScriptTimeout</h3><p>Default: 30</p><p>
    6882        Maximum time in seconds to allow an event to run before timing
    6983        out.  This is the total time for all enabled scripts that are
     
    7488        success.  The logic here is that the callers of these events
    7589        implement their own additional timeout.
    76       </p></div><div class="refsect2"><a name="idp53900064"></a><h3>MonitorTimeoutCount</h3><p>Default: 20</p><p>
    77         How many monitor events in a row need to timeout before a node
    78         is flagged as UNHEALTHY.  This setting is useful if scripts
    79         can not be written so that they do not hang for benign
    80         reasons.
    81       </p></div><div class="refsect2"><a name="idp53901872"></a><h3>RecoveryGracePeriod</h3><p>Default: 120</p><p>
    82         During recoveries, if a node has not caused recovery failures during the
    83         last grace period, any records of transgressions that the node has caused
    84         recovery failures will be forgiven. This resets the ban-counter back to
    85         zero for that node.
    86       </p></div><div class="refsect2"><a name="idp49113200"></a><h3>RecoveryBanPeriod</h3><p>Default: 300</p><p>
    87         If a node becomes banned causing repetitive recovery failures. The node will
    88         eventually become banned from the cluster.
    89         This controls how long the culprit node will be banned from the cluster
    90         before it is allowed to try to join the cluster again.
    91         Don't set to small. A node gets banned for a reason and it is usually due
    92         to real problems with the node.
    93       </p></div><div class="refsect2"><a name="idp49115184"></a><h3>DatabaseHashSize</h3><p>Default: 100001</p><p>
    94         Size of the hash chains for the local store of the tdbs that ctdb manages.
    95       </p></div><div class="refsect2"><a name="idp49116784"></a><h3>DatabaseMaxDead</h3><p>Default: 5</p><p>
    96         How many dead records per hashchain in the TDB database do we allow before
    97         the freelist needs to be processed.
    98       </p></div><div class="refsect2"><a name="idp49118528"></a><h3>RerecoveryTimeout</h3><p>Default: 10</p><p>
    99         Once a recovery has completed, no additional recoveries are permitted
    100         until this timeout has expired.
    101       </p></div><div class="refsect2"><a name="idp49120256"></a><h3>EnableBans</h3><p>Default: 1</p><p>
    102         When set to 0, this disables BANNING completely in the cluster and thus
    103         nodes can not get banned, even it they break. Don't set to 0 unless you
    104         know what you are doing.  You should set this to the same value on
    105         all nodes to avoid unexpected behaviour.
    106       </p></div><div class="refsect2"><a name="idp49122128"></a><h3>DeterministicIPs</h3><p>Default: 0</p><p>
    107         When enabled, this tunable makes ctdb try to keep public IP addresses
    108         locked to specific nodes as far as possible. This makes it easier for
    109         debugging since you can know that as long as all nodes are healthy
    110         public IP X will always be hosted by node Y.
    111       </p><p>
    112         The cost of using deterministic IP address assignment is that it
    113         disables part of the logic where ctdb tries to reduce the number of
    114         public IP assignment changes in the cluster. This tunable may increase
    115         the number of IP failover/failbacks that are performed on the cluster
    116         by a small margin.
    117       </p></div><div class="refsect2"><a name="idp49124720"></a><h3>LCP2PublicIPs</h3><p>Default: 1</p><p>
    118         When enabled this switches ctdb to use the LCP2 ip allocation
    119         algorithm.
    120       </p></div><div class="refsect2"><a name="idp49126320"></a><h3>ReclockPingPeriod</h3><p>Default: x</p><p>
    121         Obsolete
    122       </p></div><div class="refsect2"><a name="idp49127952"></a><h3>NoIPFailback</h3><p>Default: 0</p><p>
    123         When set to 1, ctdb will not perform failback of IP addresses when a node
    124         becomes healthy. Ctdb WILL perform failover of public IP addresses when a
    125         node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb
    126         will not fail the addresses back.
    127       </p><p>
    128         Use with caution! Normally when a node becomes available to the cluster
    129         ctdb will try to reassign public IP addresses onto the new node as a way
    130         to distribute the workload evenly across the clusternode. Ctdb tries to
    131         make sure that all running nodes have approximately the same number of
    132         public addresses it hosts.
    133       </p><p>
    134         When you enable this tunable, CTDB will no longer attempt to rebalance
    135         the cluster by failing IP addresses back to the new nodes. An unbalanced
    136         cluster will therefore remain unbalanced until there is manual
    137         intervention from the administrator. When this parameter is set, you can
    138         manually fail public IP addresses over to the new node(s) using the
    139         'ctdb moveip' command.
    140       </p></div><div class="refsect2"><a name="idp49136144"></a><h3>DisableIPFailover</h3><p>Default: 0</p><p>
    141         When enabled, ctdb will not perform failover or failback. Even if a
    142         node fails while holding public IPs, ctdb will not recover the IPs or
    143         assign them to another node.
    144       </p><p>
    145         When you enable this tunable, CTDB will no longer attempt to recover
    146         the cluster by failing IP addresses over to other nodes. This leads to
    147         a service outage until the administrator has manually performed failover
    148         to replacement nodes using the 'ctdb moveip' command.
    149       </p></div><div class="refsect2"><a name="idp49138608"></a><h3>NoIPTakeover</h3><p>Default: 0</p><p>
    150         When set to 1, ctdb will not allow IP addresses to be failed over
    151         onto this node. Any IP addresses that the node currently hosts
    152         will remain on the node but no new IP addresses can be failed over
    153         to the node.
    154       </p></div><div class="refsect2"><a name="idp49140448"></a><h3>NoIPHostOnAllDisabled</h3><p>Default: 0</p><p>
    155         If no nodes are healthy then by default ctdb will happily host
     90      </p></div><div class="refsect2"><a name="idp54054880"></a><h3>FetchCollapse</h3><p>Default: 1</p><p>
     91       This parameter is used to avoid multiple migration requests for
     92       the same record from a single node. All the record requests for
     93       the same record are queued up and processed when the record is
     94       migrated to the current node.
     95      </p><p>
     96        When many clients across many nodes try to access the same record
     97        at the same time this can lead to a fetch storm where the record
     98        becomes very active and bounces between nodes very fast. This
     99        leads to high CPU utilization of the ctdbd daemon, trying to
     100        bounce that record around very fast, and poor performance.
     101        This can improve performance and reduce CPU utilization for
     102        certain workloads.
     103      </p></div><div class="refsect2"><a name="idp48966640"></a><h3>HopcountMakeSticky</h3><p>Default: 50</p><p>
     104        For database(s) marked STICKY (using 'ctdb setdbsticky'),
     105        any record that is migrating so fast that hopcount
     106        exceeds this limit is marked as STICKY record for
     107        <code class="varname">StickyDuration</code> seconds. This means that
     108        after each migration the sticky record will be kept on the node
     109        <code class="varname">StickyPindown</code>milliseconds and prevented from
     110        being migrated off the node.
     111       </p><p>
     112        This will improve performance for certain workloads, such as
     113        locking.tdb if many clients are opening/closing the same file
     114        concurrently.
     115      </p></div><div class="refsect2"><a name="idp48969952"></a><h3>KeepaliveInterval</h3><p>Default: 5</p><p>
     116        How often in seconds should the nodes send keep-alive packets to
     117        each other.
     118      </p></div><div class="refsect2"><a name="idp48971552"></a><h3>KeepaliveLimit</h3><p>Default: 5</p><p>
     119        After how many keepalive intervals without any traffic should
     120        a node wait until marking the peer as DISCONNECTED.
     121       </p><p>
     122        If a node has hung, it can take
     123        <code class="varname">KeepaliveInterval</code> *
     124        (<code class="varname">KeepaliveLimit</code> + 1) seconds before
     125        ctdb determines that the node is DISCONNECTED and performs
     126        a recovery. This limit should not be set too high to enable
     127        early detection and avoid any application timeouts (e.g. SMB1)
     128        to kick in before the fail over is completed.
     129      </p></div><div class="refsect2"><a name="idp48974864"></a><h3>LCP2PublicIPs</h3><p>Default: 1</p><p>
     130        When set to 1, ctdb uses the LCP2 ip allocation algorithm.
     131      </p></div><div class="refsect2"><a name="idp48976464"></a><h3>LockProcessesPerDB</h3><p>Default: 200</p><p>
     132        This is the maximum number of lock helper processes ctdb will
     133        create for obtaining record locks.  When ctdb cannot get a record
     134        lock without blocking, it creates a helper process that waits
     135        for the lock to be obtained.
     136      </p></div><div class="refsect2"><a name="idp48978304"></a><h3>LogLatencyMs</h3><p>Default: 0</p><p>
     137        When set to non-zero, ctdb will log if certains operations
     138        take longer than this value, in milliseconds, to complete.
     139        These operations include "process a record request from client",
     140        "take a record or database lock", "update a persistent database
     141        record" and "vaccum a database".
     142      </p></div><div class="refsect2"><a name="idp48980208"></a><h3>MaxQueueDropMsg</h3><p>Default: 1000000</p><p>
     143        This is the maximum number of messages to be queued up for
     144        a client before ctdb will treat the client as hung and will
     145        terminate the client connection.
     146      </p></div><div class="refsect2"><a name="idp48981984"></a><h3>MonitorInterval</h3><p>Default: 15</p><p>
     147        How often should ctdb run the 'monitor' event in seconds to check
     148        for a node's health.
     149      </p></div><div class="refsect2"><a name="idp48988480"></a><h3>MonitorTimeoutCount</h3><p>Default: 20</p><p>
     150        How many 'monitor' events in a row need to timeout before a node
     151        is flagged as UNHEALTHY.  This setting is useful if scripts can
     152        not be written so that they do not hang for benign reasons.
     153      </p></div><div class="refsect2"><a name="idp48990288"></a><h3>NoIPFailback</h3><p>Default: 0</p><p>
     154        When set to 1, ctdb will not perform failback of IP addresses
     155        when a node becomes healthy. When a node becomes UNHEALTHY,
     156        ctdb WILL perform failover of public IP addresses, but when the
     157        node becomes HEALTHY again, ctdb will not fail the addresses back.
     158      </p><p>
     159        Use with caution! Normally when a node becomes available to the
     160        cluster ctdb will try to reassign public IP addresses onto the
     161        new node as a way to distribute the workload evenly across the
     162        clusternode. Ctdb tries to make sure that all running nodes have
     163        approximately the same number of public addresses it hosts.
     164      </p><p>
     165        When you enable this tunable, ctdb will no longer attempt to
     166        rebalance the cluster by failing IP addresses back to the new
     167        nodes. An unbalanced cluster will therefore remain unbalanced
     168        until there is manual intervention from the administrator. When
     169        this parameter is set, you can manually fail public IP addresses
     170        over to the new node(s) using the 'ctdb moveip' command.
     171      </p></div><div class="refsect2"><a name="idp48993680"></a><h3>NoIPHostOnAllDisabled</h3><p>Default: 0</p><p>
     172        If no nodes are HEALTHY then by default ctdb will happily host
    156173        public IPs on disabled (unhealthy or administratively disabled)
    157         nodes.  This can cause problems, for example if the underlying
     174        nodes.  This can cause problems, for example if the underlying
    158175        cluster filesystem is not mounted.  When set to 1 on a node and
    159         that node is disabled it, any IPs hosted by this node will be
     176        that node is disabled, any IPs hosted by this node will be
    160177        released and the node will not takeover any IPs until it is no
    161178        longer disabled.
    162       </p></div><div class="refsect2"><a name="idp49142480"></a><h3>DBRecordCountWarn</h3><p>Default: 100000</p><p>
    163         When set to non-zero, ctdb will log a warning when we try to recover a
    164         database with more than this many records. This will produce a warning
    165         if a database grows uncontrollably with orphaned records.
    166       </p></div><div class="refsect2"><a name="idp49144304"></a><h3>DBRecordSizeWarn</h3><p>Default: 10000000</p><p>
    167         When set to non-zero, ctdb will log a warning when we try to recover a
    168         database where a single record is bigger than this. This will produce
    169         a warning if a database record grows uncontrollably with orphaned
    170         sub-records.
    171       </p></div><div class="refsect2"><a name="idp49146144"></a><h3>DBSizeWarn</h3><p>Default: 1000000000</p><p>
    172         When set to non-zero, ctdb will log a warning when we try to recover a
    173         database bigger than this. This will produce
    174         a warning if a database grows uncontrollably.
    175       </p></div><div class="refsect2"><a name="idp49147936"></a><h3>VerboseMemoryNames</h3><p>Default: 0</p><p>
    176         This feature consumes additional memory. when used the talloc library
    177         will create more verbose names for all talloc allocated objects.
    178       </p></div><div class="refsect2"><a name="idp49149696"></a><h3>RecdPingTimeout</h3><p>Default: 60</p><p>
    179         If the main dameon has not heard a "ping" from the recovery dameon for
    180         this many seconds, the main dameon will log a message that the recovery
    181         daemon is potentially hung.
    182       </p></div><div class="refsect2"><a name="idp49151488"></a><h3>RecdFailCount</h3><p>Default: 10</p><p>
    183         If the recovery daemon has failed to ping the main dameon for this many
    184         consecutive intervals, the main daemon will consider the recovery daemon
    185         as hung and will try to restart it to recover.
    186       </p></div><div class="refsect2"><a name="idp49153312"></a><h3>LogLatencyMs</h3><p>Default: 0</p><p>
    187         When set to non-zero, this will make the main daemon log any operation that
    188         took longer than this value, in 'ms', to complete.
    189         These include "how long time a lockwait child process needed",
    190         "how long time to write to a persistent database" but also
    191         "how long did it take to get a response to a CALL from a remote node".
    192       </p></div><div class="refsect2"><a name="idp49155264"></a><h3>RecLockLatencyMs</h3><p>Default: 1000</p><p>
    193         When using a reclock file for split brain prevention, if set to non-zero
    194         this tunable will make the recovery dameon log a message if the fcntl()
    195         call to lock/testlock the recovery file takes longer than this number of
    196         ms.
    197       </p></div><div class="refsect2"><a name="idp49157120"></a><h3>RecoveryDropAllIPs</h3><p>Default: 120</p><p>
    198         If we have been stuck in recovery, or stopped, or banned, mode for
    199         this many seconds we will force drop all held public addresses.
    200       </p></div><div class="refsect2"><a name="idp55021168"></a><h3>VacuumInterval</h3><p>Default: 10</p><p>
     179      </p></div><div class="refsect2"><a name="idp48995696"></a><h3>NoIPTakeover</h3><p>Default: 0</p><p>
     180        When set to 1, ctdb will not allow IP addresses to be failed
     181        over onto this node. Any IP addresses that the node currently
     182        hosts will remain on the node but no new IP addresses can be
     183        failed over to the node.
     184      </p></div><div class="refsect2"><a name="idp48997536"></a><h3>PullDBPreallocation</h3><p>Default: 10*1024*1024</p><p>
     185        This is the size of a record buffer to pre-allocate for sending
     186        reply to PULLDB control. Usually record buffer starts with size
     187        of the first record and gets reallocated every time a new record
     188        is added to the record buffer. For a large number of records,
     189        this can be very inefficient to grow the record buffer one record
     190        at a time.
     191      </p></div><div class="refsect2"><a name="idp48999504"></a><h3>RecBufferSizeLimit</h3><p>Default: 1000000</p><p>
     192        This is the limit on the size of the record buffer to be sent
     193        in various controls.  This limit is used by new controls used
     194        for recovery and controls used in vacuuming.
     195      </p></div><div class="refsect2"><a name="idp49001328"></a><h3>RecdFailCount</h3><p>Default: 10</p><p>
     196        If the recovery daemon has failed to ping the main dameon for
     197        this many consecutive intervals, the main daemon will consider
     198        the recovery daemon as hung and will try to restart it to recover.
     199      </p></div><div class="refsect2"><a name="idp49003152"></a><h3>RecdPingTimeout</h3><p>Default: 60</p><p>
     200        If the main dameon has not heard a "ping" from the recovery dameon
     201        for this many seconds, the main dameon will log a message that
     202        the recovery daemon is potentially hung.  This also increments a
     203        counter which is checked against <code class="varname">RecdFailCount</code>
     204        for detection of hung recovery daemon.
     205      </p></div><div class="refsect2"><a name="idp49005424"></a><h3>RecLockLatencyMs</h3><p>Default: 1000</p><p>
     206        When using a reclock file for split brain prevention, if set
     207        to non-zero this tunable will make the recovery dameon log a
     208        message if the fcntl() call to lock/testlock the recovery file
     209        takes longer than this number of milliseconds.
     210      </p></div><div class="refsect2"><a name="idp49007280"></a><h3>RecoverInterval</h3><p>Default: 1</p><p>
     211        How frequently in seconds should the recovery daemon perform the
     212        consistency checks to determine if it should perform a recovery.
     213      </p></div><div class="refsect2"><a name="idp49009040"></a><h3>RecoverPDBBySeqNum</h3><p>Default: 1</p><p>
     214        When set to zero, database recovery for persistent databases is
     215        record-by-record and recovery process simply collects the most
     216        recent version of every individual record.
     217      </p><p>
     218        When set to non-zero, persistent databases will instead be
     219        recovered as a whole db and not by individual records. The
     220        node that contains the highest value stored in the record
     221        "__db_sequence_number__" is selected and the copy of that nodes
     222        database is used as the recovered database.
     223      </p><p>
     224        By default, recovery of persistent databses is done using
     225        __db_sequence_number__ record.
     226      </p></div><div class="refsect2"><a name="idp54874960"></a><h3>RecoverTimeout</h3><p>Default: 120</p><p>
     227        This is the default setting for timeouts for controls when sent
     228        from the recovery daemon. We allow longer control timeouts from
     229        the recovery daemon than from normal use since the recovery
     230        dameon often use controls that can take a lot longer than normal
     231        controls.
     232      </p></div><div class="refsect2"><a name="idp54876784"></a><h3>RecoveryBanPeriod</h3><p>Default: 300</p><p>
     233       The duration in seconds for which a node is banned if the node
     234       fails during recovery.  After this time has elapsed the node will
     235       automatically get unbanned and will attempt to rejoin the cluster.
     236      </p><p>
     237       A node usually gets banned due to real problems with the node.
     238       Don't set this value too small.  Otherwise, a problematic node
     239       will try to re-join cluster too soon causing unnecessary recoveries.
     240      </p></div><div class="refsect2"><a name="idp54879184"></a><h3>RecoveryDropAllIPs</h3><p>Default: 120</p><p>
     241        If a node is stuck in recovery, or stopped, or banned, for this
     242        many seconds, then ctdb will release all public addresses on
     243        that node.
     244      </p></div><div class="refsect2"><a name="idp54880880"></a><h3>RecoveryGracePeriod</h3><p>Default: 120</p><p>
     245       During recoveries, if a node has not caused recovery failures
     246       during the last grace period in seconds, any records of
     247       transgressions that the node has caused recovery failures will be
     248       forgiven. This resets the ban-counter back to zero for that node.
     249      </p></div><div class="refsect2"><a name="idp54882720"></a><h3>RepackLimit</h3><p>Default: 10000</p><p>
     250        During vacuuming, if the number of freelist records are more than
     251        <code class="varname">RepackLimit</code>, then the database is repacked
     252        to get rid of the freelist records to avoid fragmentation.
     253      </p><p>
     254        Databases are repacked only if both <code class="varname">RepackLimit</code>
     255        and <code class="varname">VacuumLimit</code> are exceeded.
     256      </p></div><div class="refsect2"><a name="idp54885920"></a><h3>RerecoveryTimeout</h3><p>Default: 10</p><p>
     257        Once a recovery has completed, no additional recoveries are
     258        permitted until this timeout in seconds has expired.
     259      </p></div><div class="refsect2"><a name="idp54887600"></a><h3>Samba3AvoidDeadlocks</h3><p>Default: 0</p><p>
     260        If set to non-zero, enable code that prevents deadlocks with Samba
     261        (only for Samba 3.x).
     262      </p><p>
     263        This should be set to 1 only when using Samba version 3.x
     264        to enable special code in ctdb to avoid deadlock with Samba
     265        version 3.x.  This code is not required for Samba version 4.x
     266        and must not be enabled for Samba 4.x.
     267      </p></div><div class="refsect2"><a name="idp54889888"></a><h3>SeqnumInterval</h3><p>Default: 1000</p><p>
     268        Some databases have seqnum tracking enabled, so that samba will
     269        be able to detect asynchronously when there has been updates
     270        to the database.  Everytime a database is updated its sequence
     271        number is increased.
     272      </p><p>
     273        This tunable is used to specify in milliseconds how frequently
     274        ctdb will send out updates to remote nodes to inform them that
     275        the sequence number is increased.
     276      </p></div><div class="refsect2"><a name="idp54892240"></a><h3>StatHistoryInterval</h3><p>Default: 1</p><p>
     277        Granularity of the statistics collected in the statistics
     278        history. This is reported by 'ctdb stats' command.
     279      </p></div><div class="refsect2"><a name="idp54893904"></a><h3>StickyDuration</h3><p>Default: 600</p><p>
     280        Once a record has been marked STICKY, this is the duration in
     281        seconds, the record will be flagged as a STICKY record.
     282      </p></div><div class="refsect2"><a name="idp54895584"></a><h3>StickyPindown</h3><p>Default: 200</p><p>
     283        Once a STICKY record has been migrated onto a node, it will be
     284        pinned down on that node for this number of milliseconds. Any
     285        request from other nodes to migrate the record off the node will
     286        be deferred.
     287      </p></div><div class="refsect2"><a name="idp54897344"></a><h3>TakeoverTimeout</h3><p>Default: 9</p><p>
     288        This is the duration in seconds in which ctdb tries to complete IP
     289        failover.
     290      </p></div><div class="refsect2"><a name="idp54898880"></a><h3>TDBMutexEnabled</h3><p>Default: 0</p><p>
     291        This paramter enables TDB_MUTEX_LOCKING feature on volatile
     292        databases if the robust mutexes are supported. This optimizes the
     293        record locking using robust mutexes and is much more efficient
     294        that using posix locks.
     295      </p></div><div class="refsect2"><a name="idp54900656"></a><h3>TickleUpdateInterval</h3><p>Default: 20</p><p>
     296        Every <code class="varname">TickleUpdateInterval</code> seconds, ctdb
     297        synchronizes the client connection information across nodes.
     298      </p></div><div class="refsect2"><a name="idp54902576"></a><h3>TraverseTimeout</h3><p>Default: 20</p><p>
     299        This is the duration in seconds for which a database traverse
     300        is allowed to run.  If the traverse does not complete during
     301        this interval, ctdb will abort the traverse.
     302      </p></div><div class="refsect2"><a name="idp54904304"></a><h3>VacuumFastPathCount</h3><p>Default: 60</p><p>
     303       During a vacuuming run, ctdb usually processes only the records
     304       marked for deletion also called the fast path vacuuming. After
     305       finishing <code class="varname">VacuumFastPathCount</code> number of fast
     306       path vacuuming runs, ctdb will trigger a scan of complete database
     307       for any empty records that need to be deleted.
     308      </p></div><div class="refsect2"><a name="idp54906560"></a><h3>VacuumInterval</h3><p>Default: 10</p><p>
    201309        Periodic interval in seconds when vacuuming is triggered for
    202310        volatile databases.
    203       </p></div><div class="refsect2"><a name="idp55022832"></a><h3>VacuumMaxRunTime</h3><p>Default: 120</p><p>
     311      </p></div><div class="refsect2"><a name="idp54908224"></a><h3>VacuumLimit</h3><p>Default: 5000</p><p>
     312        During vacuuming, if the number of deleted records are more than
     313        <code class="varname">VacuumLimit</code>, then databases are repacked to
     314        avoid fragmentation.
     315      </p><p>
     316        Databases are repacked only if both <code class="varname">RepackLimit</code>
     317        and <code class="varname">VacuumLimit</code> are exceeded.
     318      </p></div><div class="refsect2"><a name="idp54911392"></a><h3>VacuumMaxRunTime</h3><p>Default: 120</p><p>
    204319        The maximum time in seconds for which the vacuuming process is
    205320        allowed to run.  If vacuuming process takes longer than this
    206321        value, then the vacuuming process is terminated.
    207       </p></div><div class="refsect2"><a name="idp55024592"></a><h3>RepackLimit</h3><p>Default: 10000</p><p>
    208         During vacuuming, if the number of freelist records are more
    209         than <code class="varname">RepackLimit</code>, then databases are
    210         repacked to get rid of the freelist records to avoid
    211         fragmentation.
    212       </p><p>
    213         Databases are repacked only if both
    214         <code class="varname">RepackLimit</code> and
    215         <code class="varname">VacuumLimit</code> are exceeded.
    216       </p></div><div class="refsect2"><a name="idp55027792"></a><h3>VacuumLimit</h3><p>Default: 5000</p><p>
    217         During vacuuming, if the number of deleted records are more
    218         than <code class="varname">VacuumLimit</code>, then databases are
    219         repacked to avoid fragmentation.
    220       </p><p>
    221         Databases are repacked only if both
    222         <code class="varname">RepackLimit</code> and
    223         <code class="varname">VacuumLimit</code> are exceeded.
    224       </p></div><div class="refsect2"><a name="idp55030864"></a><h3>VacuumFastPathCount</h3><p>Default: 60</p><p>
    225         When a record is deleted, it is marked for deletion during
    226         vacuuming.  Vacuuming process usually processes this list to purge
    227         the records from the database.  If the number of records marked
    228         for deletion are more than VacuumFastPathCount, then vacuuming
    229         process will scan the complete database for empty records instead
    230         of using the list of records marked for deletion.
    231       </p></div><div class="refsect2"><a name="idp55032832"></a><h3>DeferredAttachTO</h3><p>Default: 120</p><p>
    232         When databases are frozen we do not allow clients to attach to the
    233         databases. Instead of returning an error immediately to the application
    234         the attach request from the client is deferred until the database
    235         becomes available again at which stage we respond to the client.
    236       </p><p>
    237         This timeout controls how long we will defer the request from the client
    238         before timing it out and returning an error to the client.
    239       </p></div><div class="refsect2"><a name="idp55035216"></a><h3>HopcountMakeSticky</h3><p>Default: 50</p><p>
    240         If the database is set to 'STICKY' mode, using the 'ctdb setdbsticky'
    241         command, any record that is seen as very hot and migrating so fast that
    242         hopcount surpasses 50 is set to become a STICKY record for StickyDuration
    243         seconds. This means that after each migration the record will be kept on
    244         the node and prevented from being migrated off the node.
    245       </p><p>
    246         This setting allows one to try to identify such records and stop them from
    247         migrating across the cluster so fast. This will improve performance for
    248         certain workloads, such as locking.tdb if many clients are opening/closing
    249         the same file concurrently.
    250       </p></div><div class="refsect2"><a name="idp55037776"></a><h3>StickyDuration</h3><p>Default: 600</p><p>
    251         Once a record has been found to be fetch-lock hot and has been flagged to
    252         become STICKY, this is for how long, in seconds, the record will be
    253         flagged as a STICKY record.
    254       </p></div><div class="refsect2"><a name="idp55039504"></a><h3>StickyPindown</h3><p>Default: 200</p><p>
    255         Once a STICKY record has been migrated onto a node, it will be pinned down
    256         on that node for this number of ms. Any request from other nodes to migrate
    257         the record off the node will be deferred until the pindown timer expires.
    258       </p></div><div class="refsect2"><a name="idp55041296"></a><h3>StatHistoryInterval</h3><p>Default: 1</p><p>
    259         Granularity of the statistics collected in the statistics history.
    260       </p></div><div class="refsect2"><a name="idp55042928"></a><h3>AllowClientDBAttach</h3><p>Default: 1</p><p>
    261         When set to 0, clients are not allowed to attach to any databases.
    262         This can be used to temporarily block any new processes from attaching
    263         to and accessing the databases.
    264       </p></div><div class="refsect2"><a name="idp55044656"></a><h3>RecoverPDBBySeqNum</h3><p>Default: 1</p><p>
    265         When set to zero, database recovery for persistent databases
    266         is record-by-record and recovery process simply collects the
    267         most recent version of every individual record.
    268       </p><p>
    269         When set to non-zero, persistent databases will instead be
    270         recovered as a whole db and not by individual records. The
    271         node that contains the highest value stored in the record
    272         "__db_sequence_number__" is selected and the copy of that
    273         nodes database is used as the recovered database.
    274       </p><p>
    275         By default, recovery of persistent databses is done using
    276         __db_sequence_number__ record.
    277       </p></div><div class="refsect2"><a name="idp55047584"></a><h3>FetchCollapse</h3><p>Default: 1</p><p>
    278         When many clients across many nodes try to access the same record at the
    279         same time this can lead to a fetch storm where the record becomes very
    280         active and bounces between nodes very fast. This leads to high CPU
    281         utilization of the ctdbd daemon, trying to bounce that record around
    282         very fast, and poor performance.
    283       </p><p>
    284         This parameter is used to activate a fetch-collapse. A fetch-collapse
    285         is when we track which records we have requests in flight so that we only
    286         keep one request in flight from a certain node, even if multiple smbd
    287         processes are attemtping to fetch the record at the same time. This
    288         can improve performance and reduce CPU utilization for certain
    289         workloads.
    290       </p><p>
    291         This timeout controls if we should collapse multiple fetch operations
    292         of the same record into a single request and defer all duplicates or not.
    293       </p></div><div class="refsect2"><a name="idp55050784"></a><h3>Samba3AvoidDeadlocks</h3><p>Default: 0</p><p>
    294         Enable code that prevents deadlocks with Samba (only for Samba 3.x).
    295       </p><p>
    296         This should be set to 1 when using Samba version 3.x to enable special
    297         code in CTDB to avoid deadlock with Samba version 3.x.  This code
    298         is not required for Samba version 4.x and must not be enabled for
    299         Samba 4.x.
    300       </p></div></div><div class="refsect1"><a name="idp55053168"></a><h2>SEE ALSO</h2><p>
     322      </p></div><div class="refsect2"><a name="idp54913152"></a><h3>VerboseMemoryNames</h3><p>Default: 0</p><p>
     323        When set to non-zero, ctdb assigns verbose names for some of
     324        the talloc allocated memory objects.  These names are visible
     325        in the talloc memory report generated by 'ctdb dumpmemory'.
     326      </p></div></div><div class="refsect1"><a name="idp54915024"></a><h2>SEE ALSO</h2><p>
    301327      <span class="citerefentry"><span class="refentrytitle">ctdb</span>(1)</span>,
    302328
  • vendor/current/ctdb/doc/ctdb-tunables.7.xml

    r988 r989  
    3030    </para>
    3131
    32     <refsect2>
    33       <title>MaxRedirectCount</title>
    34       <para>Default: 3</para>
    35       <para>
    36         If we are not the DMASTER and need to fetch a record across the network
    37         we first send the request to the LMASTER after which the record
    38         is passed onto the current DMASTER. If the DMASTER changes before
    39         the request has reached that node, the request will be passed onto the
    40         "next" DMASTER. For very hot records that migrate rapidly across the
    41         cluster this can cause a request to "chase" the record for many hops
    42         before it catches up with the record.
    43 
    44         this is how many hops we allow trying to chase the DMASTER before we
    45         switch back to the LMASTER again to ask for new directions.
    46       </para>
    47       <para>
    48         When chasing a record, this is how many hops we will chase the record
    49         for before going back to the LMASTER to ask for new guidance.
    50       </para>
    51     </refsect2>
    52 
    53     <refsect2>
    54       <title>SeqnumInterval</title>
    55       <para>Default: 1000</para>
    56       <para>
    57         Some databases have seqnum tracking enabled, so that samba will be able
    58         to detect asynchronously when there has been updates to the database.
    59         Everytime a database is updated its sequence number is increased.
    60       </para>
    61       <para>
    62         This tunable is used to specify in 'ms' how frequently ctdb will
    63         send out updates to remote nodes to inform them that the sequence
    64         number is increased.
     32    <para>
     33      The tunable variables are listed alphabetically.
     34    </para>
     35
     36    <refsect2>
     37      <title>AllowClientDBAttach</title>
     38      <para>Default: 1</para>
     39      <para>
     40        When set to 0, clients are not allowed to attach to any databases.
     41        This can be used to temporarily block any new processes from
     42        attaching to and accessing the databases.  This is mainly used
     43        for detaching a volatile database using 'ctdb detach'.
     44      </para>
     45    </refsect2>
     46
     47    <refsect2>
     48      <title>AllowUnhealthyDBRead</title>
     49      <para>Default: 0</para>
     50      <para>
     51        When set to 1, ctdb allows database traverses to read unhealthy
     52        databases.  By default, ctdb does not allow reading records from
     53        unhealthy databases.
    6554      </para>
    6655    </refsect2>
     
    7059      <para>Default: 60</para>
    7160      <para>
    72         This is the default
    73         setting for timeout for when sending a control message to either the
    74         local or a remote ctdb daemon.
    75       </para>
    76     </refsect2>
    77 
    78     <refsect2>
    79       <title>TraverseTimeout</title>
    80       <para>Default: 20</para>
    81       <para>
    82         This setting controls how long we allow a traverse process to run.
    83         After this timeout triggers, the main ctdb daemon will abort the
    84         traverse if it has not yet finished.
    85       </para>
    86     </refsect2>
    87 
    88     <refsect2>
    89       <title>KeepaliveInterval</title>
     61        This is the default setting for timeout for when sending a
     62        control message to either the local or a remote ctdb daemon.
     63      </para>
     64    </refsect2>
     65
     66    <refsect2>
     67      <title>DatabaseHashSize</title>
     68      <para>Default: 100001</para>
     69      <para>
     70        Number of the hash chains for the local store of the tdbs that
     71        ctdb manages.
     72      </para>
     73    </refsect2>
     74
     75    <refsect2>
     76      <title>DatabaseMaxDead</title>
    9077      <para>Default: 5</para>
    9178      <para>
    92         How often in seconds should the nodes send keepalives to eachother.
    93       </para>
    94     </refsect2>
    95 
    96     <refsect2>
    97       <title>KeepaliveLimit</title>
    98       <para>Default: 5</para>
    99       <para>
    100         After how many keepalive intervals without any traffic should a node
    101         wait until marking the peer as DISCONNECTED.
    102       </para>
    103       <para>
    104         If a node has hung, it can thus take KeepaliveInterval*(KeepaliveLimit+1)
    105         seconds before we determine that the node is DISCONNECTED and that we
    106         require a recovery. This limitshould not be set too high since we want
    107         a hung node to be detectec, and expunged from the cluster well before
    108         common CIFS timeouts (45-90 seconds) kick in.
    109       </para>
    110     </refsect2>
    111 
    112     <refsect2>
    113       <title>RecoverTimeout</title>
    114       <para>Default: 20</para>
    115       <para>
    116         This is the default setting for timeouts for controls when sent from the
    117         recovery daemon. We allow longer control timeouts from the recovery daemon
    118         than from normal use since the recovery dameon often use controls that
    119         can take a lot longer than normal controls.
    120       </para>
    121     </refsect2>
    122 
    123     <refsect2>
    124       <title>RecoverInterval</title>
    125       <para>Default: 1</para>
    126       <para>
    127         How frequently in seconds should the recovery daemon perform the
    128         consistency checks that determine if we need to perform a recovery or not.
     79        Maximum number of dead records per hash chain for the tdb databses
     80        managed by ctdb.
     81      </para>
     82    </refsect2>
     83
     84    <refsect2>
     85      <title>DBRecordCountWarn</title>
     86      <para>Default: 100000</para>
     87      <para>
     88        When set to non-zero, ctdb will log a warning during recovery if
     89        a database has more than this many records. This will produce a
     90        warning if a database grows uncontrollably with orphaned records.
     91      </para>
     92    </refsect2>
     93
     94    <refsect2>
     95      <title>DBRecordSizeWarn</title>
     96      <para>Default: 10000000</para>
     97      <para>
     98        When set to non-zero, ctdb will log a warning during recovery
     99        if a single record is bigger than this size. This will produce
     100        a warning if a database record grows uncontrollably.
     101      </para>
     102    </refsect2>
     103
     104    <refsect2>
     105      <title>DBSizeWarn</title>
     106      <para>Default: 1000000000</para>
     107      <para>
     108        When set to non-zero, ctdb will log a warning during recovery if
     109        a database size is bigger than this. This will produce a warning
     110        if a database grows uncontrollably.
     111      </para>
     112    </refsect2>
     113
     114    <refsect2>
     115      <title>DeferredAttachTO</title>
     116      <para>Default: 120</para>
     117      <para>
     118        When databases are frozen we do not allow clients to attach to
     119        the databases. Instead of returning an error immediately to the
     120        client, the attach request from the client is deferred until
     121        the database becomes available again at which stage we respond
     122        to the client.
     123      </para>
     124      <para>
     125        This timeout controls how long we will defer the request from the
     126        client before timing it out and returning an error to the client.
     127      </para>
     128    </refsect2>
     129
     130    <refsect2>
     131      <title>DeterministicIPs</title>
     132      <para>Default: 0</para>
     133      <para>
     134        When set to 1, ctdb will try to keep public IP addresses locked
     135        to specific nodes as far as possible. This makes it easier
     136        for debugging since you can know that as long as all nodes are
     137        healthy public IP X will always be hosted by node Y.
     138      </para>
     139      <para>
     140        The cost of using deterministic IP address assignment is that it
     141        disables part of the logic where ctdb tries to reduce the number
     142        of public IP assignment changes in the cluster. This tunable may
     143        increase the number of IP failover/failbacks that are performed
     144        on the cluster by a small margin.
     145      </para>
     146    </refsect2>
     147
     148    <refsect2>
     149      <title>DisableIPFailover</title>
     150      <para>Default: 0</para>
     151      <para>
     152        When set to non-zero, ctdb will not perform failover or
     153        failback. Even if a node fails while holding public IPs, ctdb
     154        will not recover the IPs or assign them to another node.
     155      </para>
     156      <para>
     157        When this tunable is enabled, ctdb will no longer attempt
     158        to recover the cluster by failing IP addresses over to other
     159        nodes. This leads to a service outage until the administrator
     160        has manually performed IP failover to replacement nodes using the
     161        'ctdb moveip' command.
    129162      </para>
    130163    </refsect2>
     
    134167      <para>Default: 3</para>
    135168      <para>
    136         When electing a new recovery master, this is how many seconds we allow
    137         the election to take before we either deem the election finished
    138         or we fail the election and start a new one.
    139       </para>
    140     </refsect2>
    141 
    142     <refsect2>
    143       <title>TakeoverTimeout</title>
    144       <para>Default: 9</para>
    145       <para>
    146         This is how many seconds we allow controls to take for IP failover events.
    147       </para>
    148     </refsect2>
    149 
    150     <refsect2>
    151       <title>MonitorInterval</title>
    152       <para>Default: 15</para>
    153       <para>
    154         How often should ctdb run the event scripts to check for a nodes health.
    155       </para>
    156     </refsect2>
    157 
    158     <refsect2>
    159       <title>TickleUpdateInterval</title>
    160       <para>Default: 20</para>
    161       <para>
    162         How often will ctdb record and store the "tickle" information used to
    163         kickstart stalled tcp connections after a recovery.
     169        The number of seconds to wait for the election of recovery
     170        master to complete. If the election is not completed during this
     171        interval, then that round of election fails and ctdb starts a
     172        new election.
     173      </para>
     174    </refsect2>
     175
     176    <refsect2>
     177      <title>EnableBans</title>
     178      <para>Default: 1</para>
     179      <para>
     180        This parameter allows ctdb to ban a node if the node is misbehaving.
     181      </para>
     182      <para>
     183        When set to 0, this disables banning completely in the cluster
     184        and thus nodes can not get banned, even it they break. Don't
     185        set to 0 unless you know what you are doing.  You should set
     186        this to the same value on all nodes to avoid unexpected behaviour.
    164187      </para>
    165188    </refsect2>
     
    173196        run for an event, not just a single event script.
    174197      </para>
    175 
    176198      <para>
    177199        Note that timeouts are ignored for some events ("takeip",
     
    183205
    184206    <refsect2>
     207      <title>FetchCollapse</title>
     208      <para>Default: 1</para>
     209      <para>
     210       This parameter is used to avoid multiple migration requests for
     211       the same record from a single node. All the record requests for
     212       the same record are queued up and processed when the record is
     213       migrated to the current node.
     214      </para>
     215      <para>
     216        When many clients across many nodes try to access the same record
     217        at the same time this can lead to a fetch storm where the record
     218        becomes very active and bounces between nodes very fast. This
     219        leads to high CPU utilization of the ctdbd daemon, trying to
     220        bounce that record around very fast, and poor performance.
     221        This can improve performance and reduce CPU utilization for
     222        certain workloads.
     223      </para>
     224    </refsect2>
     225
     226    <refsect2>
     227      <title>HopcountMakeSticky</title>
     228      <para>Default: 50</para>
     229      <para>
     230        For database(s) marked STICKY (using 'ctdb setdbsticky'),
     231        any record that is migrating so fast that hopcount
     232        exceeds this limit is marked as STICKY record for
     233        <varname>StickyDuration</varname> seconds. This means that
     234        after each migration the sticky record will be kept on the node
     235        <varname>StickyPindown</varname>milliseconds and prevented from
     236        being migrated off the node.
     237       </para>
     238       <para>
     239        This will improve performance for certain workloads, such as
     240        locking.tdb if many clients are opening/closing the same file
     241        concurrently.
     242      </para>
     243    </refsect2>
     244
     245    <refsect2>
     246      <title>KeepaliveInterval</title>
     247      <para>Default: 5</para>
     248      <para>
     249        How often in seconds should the nodes send keep-alive packets to
     250        each other.
     251      </para>
     252    </refsect2>
     253
     254    <refsect2>
     255      <title>KeepaliveLimit</title>
     256      <para>Default: 5</para>
     257      <para>
     258        After how many keepalive intervals without any traffic should
     259        a node wait until marking the peer as DISCONNECTED.
     260       </para>
     261       <para>
     262        If a node has hung, it can take
     263        <varname>KeepaliveInterval</varname> *
     264        (<varname>KeepaliveLimit</varname> + 1) seconds before
     265        ctdb determines that the node is DISCONNECTED and performs
     266        a recovery. This limit should not be set too high to enable
     267        early detection and avoid any application timeouts (e.g. SMB1)
     268        to kick in before the fail over is completed.
     269      </para>
     270    </refsect2>
     271
     272    <refsect2>
     273      <title>LCP2PublicIPs</title>
     274      <para>Default: 1</para>
     275      <para>
     276        When set to 1, ctdb uses the LCP2 ip allocation algorithm.
     277      </para>
     278    </refsect2>
     279
     280    <refsect2>
     281      <title>LockProcessesPerDB</title>
     282      <para>Default: 200</para>
     283      <para>
     284        This is the maximum number of lock helper processes ctdb will
     285        create for obtaining record locks.  When ctdb cannot get a record
     286        lock without blocking, it creates a helper process that waits
     287        for the lock to be obtained.
     288      </para>
     289    </refsect2>
     290
     291    <refsect2>
     292      <title>LogLatencyMs</title>
     293      <para>Default: 0</para>
     294      <para>
     295        When set to non-zero, ctdb will log if certains operations
     296        take longer than this value, in milliseconds, to complete.
     297        These operations include "process a record request from client",
     298        "take a record or database lock", "update a persistent database
     299        record" and "vaccum a database".
     300      </para>
     301    </refsect2>
     302
     303    <refsect2>
     304      <title>MaxQueueDropMsg</title>
     305      <para>Default: 1000000</para>
     306      <para>
     307        This is the maximum number of messages to be queued up for
     308        a client before ctdb will treat the client as hung and will
     309        terminate the client connection.
     310      </para>
     311    </refsect2>
     312
     313    <refsect2>
     314      <title>MonitorInterval</title>
     315      <para>Default: 15</para>
     316      <para>
     317        How often should ctdb run the 'monitor' event in seconds to check
     318        for a node's health.
     319      </para>
     320    </refsect2>
     321
     322    <refsect2>
    185323      <title>MonitorTimeoutCount</title>
    186324      <para>Default: 20</para>
    187325      <para>
    188         How many monitor events in a row need to timeout before a node
    189         is flagged as UNHEALTHY.  This setting is useful if scripts
    190         can not be written so that they do not hang for benign
    191         reasons.
     326        How many 'monitor' events in a row need to timeout before a node
     327        is flagged as UNHEALTHY.  This setting is useful if scripts can
     328        not be written so that they do not hang for benign reasons.
     329      </para>
     330    </refsect2>
     331
     332    <refsect2>
     333      <title>NoIPFailback</title>
     334      <para>Default: 0</para>
     335      <para>
     336        When set to 1, ctdb will not perform failback of IP addresses
     337        when a node becomes healthy. When a node becomes UNHEALTHY,
     338        ctdb WILL perform failover of public IP addresses, but when the
     339        node becomes HEALTHY again, ctdb will not fail the addresses back.
     340      </para>
     341      <para>
     342        Use with caution! Normally when a node becomes available to the
     343        cluster ctdb will try to reassign public IP addresses onto the
     344        new node as a way to distribute the workload evenly across the
     345        clusternode. Ctdb tries to make sure that all running nodes have
     346        approximately the same number of public addresses it hosts.
     347      </para>
     348      <para>
     349        When you enable this tunable, ctdb will no longer attempt to
     350        rebalance the cluster by failing IP addresses back to the new
     351        nodes. An unbalanced cluster will therefore remain unbalanced
     352        until there is manual intervention from the administrator. When
     353        this parameter is set, you can manually fail public IP addresses
     354        over to the new node(s) using the 'ctdb moveip' command.
     355      </para>
     356    </refsect2>
     357
     358    <refsect2>
     359      <title>NoIPHostOnAllDisabled</title>
     360      <para>Default: 0</para>
     361      <para>
     362        If no nodes are HEALTHY then by default ctdb will happily host
     363        public IPs on disabled (unhealthy or administratively disabled)
     364        nodes.  This can cause problems, for example if the underlying
     365        cluster filesystem is not mounted.  When set to 1 on a node and
     366        that node is disabled, any IPs hosted by this node will be
     367        released and the node will not takeover any IPs until it is no
     368        longer disabled.
     369      </para>
     370    </refsect2>
     371
     372    <refsect2>
     373      <title>NoIPTakeover</title>
     374      <para>Default: 0</para>
     375      <para>
     376        When set to 1, ctdb will not allow IP addresses to be failed
     377        over onto this node. Any IP addresses that the node currently
     378        hosts will remain on the node but no new IP addresses can be
     379        failed over to the node.
     380      </para>
     381    </refsect2>
     382
     383    <refsect2>
     384      <title>PullDBPreallocation</title>
     385      <para>Default: 10*1024*1024</para>
     386      <para>
     387        This is the size of a record buffer to pre-allocate for sending
     388        reply to PULLDB control. Usually record buffer starts with size
     389        of the first record and gets reallocated every time a new record
     390        is added to the record buffer. For a large number of records,
     391        this can be very inefficient to grow the record buffer one record
     392        at a time.
     393      </para>
     394    </refsect2>
     395
     396    <refsect2>
     397      <title>RecBufferSizeLimit</title>
     398      <para>Default: 1000000</para>
     399      <para>
     400        This is the limit on the size of the record buffer to be sent
     401        in various controls.  This limit is used by new controls used
     402        for recovery and controls used in vacuuming.
     403      </para>
     404    </refsect2>
     405
     406    <refsect2>
     407      <title>RecdFailCount</title>
     408      <para>Default: 10</para>
     409      <para>
     410        If the recovery daemon has failed to ping the main dameon for
     411        this many consecutive intervals, the main daemon will consider
     412        the recovery daemon as hung and will try to restart it to recover.
     413      </para>
     414    </refsect2>
     415
     416    <refsect2>
     417      <title>RecdPingTimeout</title>
     418      <para>Default: 60</para>
     419      <para>
     420        If the main dameon has not heard a "ping" from the recovery dameon
     421        for this many seconds, the main dameon will log a message that
     422        the recovery daemon is potentially hung.  This also increments a
     423        counter which is checked against <varname>RecdFailCount</varname>
     424        for detection of hung recovery daemon.
     425      </para>
     426    </refsect2>
     427
     428    <refsect2>
     429      <title>RecLockLatencyMs</title>
     430      <para>Default: 1000</para>
     431      <para>
     432        When using a reclock file for split brain prevention, if set
     433        to non-zero this tunable will make the recovery dameon log a
     434        message if the fcntl() call to lock/testlock the recovery file
     435        takes longer than this number of milliseconds.
     436      </para>
     437    </refsect2>
     438
     439    <refsect2>
     440      <title>RecoverInterval</title>
     441      <para>Default: 1</para>
     442      <para>
     443        How frequently in seconds should the recovery daemon perform the
     444        consistency checks to determine if it should perform a recovery.
     445      </para>
     446    </refsect2>
     447
     448    <refsect2>
     449      <title>RecoverPDBBySeqNum</title>
     450      <para>Default: 1</para>
     451      <para>
     452        When set to zero, database recovery for persistent databases is
     453        record-by-record and recovery process simply collects the most
     454        recent version of every individual record.
     455      </para>
     456      <para>
     457        When set to non-zero, persistent databases will instead be
     458        recovered as a whole db and not by individual records. The
     459        node that contains the highest value stored in the record
     460        "__db_sequence_number__" is selected and the copy of that nodes
     461        database is used as the recovered database.
     462      </para>
     463      <para>
     464        By default, recovery of persistent databses is done using
     465        __db_sequence_number__ record.
     466      </para>
     467    </refsect2>
     468
     469    <refsect2>
     470      <title>RecoverTimeout</title>
     471      <para>Default: 120</para>
     472      <para>
     473        This is the default setting for timeouts for controls when sent
     474        from the recovery daemon. We allow longer control timeouts from
     475        the recovery daemon than from normal use since the recovery
     476        dameon often use controls that can take a lot longer than normal
     477        controls.
     478      </para>
     479    </refsect2>
     480
     481    <refsect2>
     482      <title>RecoveryBanPeriod</title>
     483      <para>Default: 300</para>
     484      <para>
     485       The duration in seconds for which a node is banned if the node
     486       fails during recovery.  After this time has elapsed the node will
     487       automatically get unbanned and will attempt to rejoin the cluster.
     488      </para>
     489      <para>
     490       A node usually gets banned due to real problems with the node.
     491       Don't set this value too small.  Otherwise, a problematic node
     492       will try to re-join cluster too soon causing unnecessary recoveries.
     493      </para>
     494    </refsect2>
     495
     496    <refsect2>
     497      <title>RecoveryDropAllIPs</title>
     498      <para>Default: 120</para>
     499      <para>
     500        If a node is stuck in recovery, or stopped, or banned, for this
     501        many seconds, then ctdb will release all public addresses on
     502        that node.
    192503      </para>
    193504    </refsect2>
     
    197508      <para>Default: 120</para>
    198509      <para>
    199         During recoveries, if a node has not caused recovery failures during the
    200         last grace period, any records of transgressions that the node has caused
    201         recovery failures will be forgiven. This resets the ban-counter back to
    202         zero for that node.
    203       </para>
    204     </refsect2>
    205 
    206     <refsect2>
    207       <title>RecoveryBanPeriod</title>
    208       <para>Default: 300</para>
    209       <para>
    210         If a node becomes banned causing repetitive recovery failures. The node will
    211         eventually become banned from the cluster.
    212         This controls how long the culprit node will be banned from the cluster
    213         before it is allowed to try to join the cluster again.
    214         Don't set to small. A node gets banned for a reason and it is usually due
    215         to real problems with the node.
    216       </para>
    217     </refsect2>
    218 
    219     <refsect2>
    220       <title>DatabaseHashSize</title>
    221       <para>Default: 100001</para>
    222       <para>
    223         Size of the hash chains for the local store of the tdbs that ctdb manages.
    224       </para>
    225     </refsect2>
    226 
    227     <refsect2>
    228       <title>DatabaseMaxDead</title>
    229       <para>Default: 5</para>
    230       <para>
    231         How many dead records per hashchain in the TDB database do we allow before
    232         the freelist needs to be processed.
     510       During recoveries, if a node has not caused recovery failures
     511       during the last grace period in seconds, any records of
     512       transgressions that the node has caused recovery failures will be
     513       forgiven. This resets the ban-counter back to zero for that node.
     514      </para>
     515    </refsect2>
     516
     517    <refsect2>
     518      <title>RepackLimit</title>
     519      <para>Default: 10000</para>
     520      <para>
     521        During vacuuming, if the number of freelist records are more than
     522        <varname>RepackLimit</varname>, then the database is repacked
     523        to get rid of the freelist records to avoid fragmentation.
     524      </para>
     525      <para>
     526        Databases are repacked only if both <varname>RepackLimit</varname>
     527        and <varname>VacuumLimit</varname> are exceeded.
    233528      </para>
    234529    </refsect2>
     
    238533      <para>Default: 10</para>
    239534      <para>
    240         Once a recovery has completed, no additional recoveries are permitted
    241         until this timeout has expired.
    242       </para>
    243     </refsect2>
    244 
    245     <refsect2>
    246       <title>EnableBans</title>
     535        Once a recovery has completed, no additional recoveries are
     536        permitted until this timeout in seconds has expired.
     537      </para>
     538    </refsect2>
     539
     540    <refsect2>
     541      <title>Samba3AvoidDeadlocks</title>
     542      <para>Default: 0</para>
     543      <para>
     544        If set to non-zero, enable code that prevents deadlocks with Samba
     545        (only for Samba 3.x).
     546      </para> <para>
     547        This should be set to 1 only when using Samba version 3.x
     548        to enable special code in ctdb to avoid deadlock with Samba
     549        version 3.x.  This code is not required for Samba version 4.x
     550        and must not be enabled for Samba 4.x.
     551      </para>
     552    </refsect2>
     553
     554    <refsect2>
     555      <title>SeqnumInterval</title>
     556      <para>Default: 1000</para>
     557      <para>
     558        Some databases have seqnum tracking enabled, so that samba will
     559        be able to detect asynchronously when there has been updates
     560        to the database.  Everytime a database is updated its sequence
     561        number is increased.
     562      </para>
     563      <para>
     564        This tunable is used to specify in milliseconds how frequently
     565        ctdb will send out updates to remote nodes to inform them that
     566        the sequence number is increased.
     567      </para>
     568    </refsect2>
     569
     570    <refsect2>
     571      <title>StatHistoryInterval</title>
    247572      <para>Default: 1</para>
    248573      <para>
    249         When set to 0, this disables BANNING completely in the cluster and thus
    250         nodes can not get banned, even it they break. Don't set to 0 unless you
    251         know what you are doing.  You should set this to the same value on
    252         all nodes to avoid unexpected behaviour.
    253       </para>
    254     </refsect2>
    255 
    256     <refsect2>
    257       <title>DeterministicIPs</title>
    258       <para>Default: 0</para>
    259       <para>
    260         When enabled, this tunable makes ctdb try to keep public IP addresses
    261         locked to specific nodes as far as possible. This makes it easier for
    262         debugging since you can know that as long as all nodes are healthy
    263         public IP X will always be hosted by node Y.
    264       </para>
    265       <para>
    266         The cost of using deterministic IP address assignment is that it
    267         disables part of the logic where ctdb tries to reduce the number of
    268         public IP assignment changes in the cluster. This tunable may increase
    269         the number of IP failover/failbacks that are performed on the cluster
    270         by a small margin.
    271       </para>
    272 
    273     </refsect2>
    274     <refsect2>
    275       <title>LCP2PublicIPs</title>
    276       <para>Default: 1</para>
    277       <para>
    278         When enabled this switches ctdb to use the LCP2 ip allocation
    279         algorithm.
    280       </para>
    281     </refsect2>
    282 
    283     <refsect2>
    284       <title>ReclockPingPeriod</title>
    285       <para>Default: x</para>
    286       <para>
    287         Obsolete
    288       </para>
    289     </refsect2>
    290 
    291     <refsect2>
    292       <title>NoIPFailback</title>
    293       <para>Default: 0</para>
    294       <para>
    295         When set to 1, ctdb will not perform failback of IP addresses when a node
    296         becomes healthy. Ctdb WILL perform failover of public IP addresses when a
    297         node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb
    298         will not fail the addresses back.
    299       </para>
    300       <para>
    301         Use with caution! Normally when a node becomes available to the cluster
    302         ctdb will try to reassign public IP addresses onto the new node as a way
    303         to distribute the workload evenly across the clusternode. Ctdb tries to
    304         make sure that all running nodes have approximately the same number of
    305         public addresses it hosts.
    306       </para>
    307       <para>
    308         When you enable this tunable, CTDB will no longer attempt to rebalance
    309         the cluster by failing IP addresses back to the new nodes. An unbalanced
    310         cluster will therefore remain unbalanced until there is manual
    311         intervention from the administrator. When this parameter is set, you can
    312         manually fail public IP addresses over to the new node(s) using the
    313         'ctdb moveip' command.
    314       </para>
    315     </refsect2>
    316 
    317     <refsect2>
    318       <title>DisableIPFailover</title>
    319       <para>Default: 0</para>
    320       <para>
    321         When enabled, ctdb will not perform failover or failback. Even if a
    322         node fails while holding public IPs, ctdb will not recover the IPs or
    323         assign them to another node.
    324       </para>
    325       <para>
    326         When you enable this tunable, CTDB will no longer attempt to recover
    327         the cluster by failing IP addresses over to other nodes. This leads to
    328         a service outage until the administrator has manually performed failover
    329         to replacement nodes using the 'ctdb moveip' command.
    330       </para>
    331     </refsect2>
    332 
    333     <refsect2>
    334       <title>NoIPTakeover</title>
    335       <para>Default: 0</para>
    336       <para>
    337         When set to 1, ctdb will not allow IP addresses to be failed over
    338         onto this node. Any IP addresses that the node currently hosts
    339         will remain on the node but no new IP addresses can be failed over
    340         to the node.
    341       </para>
    342     </refsect2>
    343 
    344     <refsect2>
    345       <title>NoIPHostOnAllDisabled</title>
    346       <para>Default: 0</para>
    347       <para>
    348         If no nodes are healthy then by default ctdb will happily host
    349         public IPs on disabled (unhealthy or administratively disabled)
    350         nodes.  This can cause problems, for example if the underlying
    351         cluster filesystem is not mounted.  When set to 1 on a node and
    352         that node is disabled it, any IPs hosted by this node will be
    353         released and the node will not takeover any IPs until it is no
    354         longer disabled.
    355       </para>
    356     </refsect2>
    357 
    358     <refsect2>
    359       <title>DBRecordCountWarn</title>
    360       <para>Default: 100000</para>
    361       <para>
    362         When set to non-zero, ctdb will log a warning when we try to recover a
    363         database with more than this many records. This will produce a warning
    364         if a database grows uncontrollably with orphaned records.
    365       </para>
    366     </refsect2>
    367 
    368     <refsect2>
    369       <title>DBRecordSizeWarn</title>
    370       <para>Default: 10000000</para>
    371       <para>
    372         When set to non-zero, ctdb will log a warning when we try to recover a
    373         database where a single record is bigger than this. This will produce
    374         a warning if a database record grows uncontrollably with orphaned
    375         sub-records.
    376       </para>
    377     </refsect2>
    378 
    379     <refsect2>
    380       <title>DBSizeWarn</title>
    381       <para>Default: 1000000000</para>
    382       <para>
    383         When set to non-zero, ctdb will log a warning when we try to recover a
    384         database bigger than this. This will produce
    385         a warning if a database grows uncontrollably.
    386       </para>
    387     </refsect2>
    388 
    389     <refsect2>
    390       <title>VerboseMemoryNames</title>
    391       <para>Default: 0</para>
    392       <para>
    393         This feature consumes additional memory. when used the talloc library
    394         will create more verbose names for all talloc allocated objects.
    395       </para>
    396     </refsect2>
    397 
    398     <refsect2>
    399       <title>RecdPingTimeout</title>
     574        Granularity of the statistics collected in the statistics
     575        history. This is reported by 'ctdb stats' command.
     576      </para>
     577    </refsect2>
     578
     579    <refsect2>
     580      <title>StickyDuration</title>
     581      <para>Default: 600</para>
     582      <para>
     583        Once a record has been marked STICKY, this is the duration in
     584        seconds, the record will be flagged as a STICKY record.
     585      </para>
     586    </refsect2>
     587
     588    <refsect2>
     589      <title>StickyPindown</title>
     590      <para>Default: 200</para>
     591      <para>
     592        Once a STICKY record has been migrated onto a node, it will be
     593        pinned down on that node for this number of milliseconds. Any
     594        request from other nodes to migrate the record off the node will
     595        be deferred.
     596      </para>
     597    </refsect2>
     598
     599    <refsect2>
     600      <title>TakeoverTimeout</title>
     601      <para>Default: 9</para>
     602      <para>
     603        This is the duration in seconds in which ctdb tries to complete IP
     604        failover.
     605      </para>
     606    </refsect2>
     607
     608    <refsect2>
     609      <title>TDBMutexEnabled</title>
     610      <para>Default: 0</para>
     611      <para>
     612        This paramter enables TDB_MUTEX_LOCKING feature on volatile
     613        databases if the robust mutexes are supported. This optimizes the
     614        record locking using robust mutexes and is much more efficient
     615        that using posix locks.
     616      </para>
     617    </refsect2>
     618
     619    <refsect2>
     620      <title>TickleUpdateInterval</title>
     621      <para>Default: 20</para>
     622      <para>
     623        Every <varname>TickleUpdateInterval</varname> seconds, ctdb
     624        synchronizes the client connection information across nodes.
     625      </para>
     626    </refsect2>
     627
     628    <refsect2>
     629      <title>TraverseTimeout</title>
     630      <para>Default: 20</para>
     631      <para>
     632        This is the duration in seconds for which a database traverse
     633        is allowed to run.  If the traverse does not complete during
     634        this interval, ctdb will abort the traverse.
     635      </para>
     636    </refsect2>
     637
     638    <refsect2>
     639      <title>VacuumFastPathCount</title>
    400640      <para>Default: 60</para>
    401641      <para>
    402         If the main dameon has not heard a "ping" from the recovery dameon for
    403         this many seconds, the main dameon will log a message that the recovery
    404         daemon is potentially hung.
    405       </para>
    406     </refsect2>
    407 
    408     <refsect2>
    409       <title>RecdFailCount</title>
    410       <para>Default: 10</para>
    411       <para>
    412         If the recovery daemon has failed to ping the main dameon for this many
    413         consecutive intervals, the main daemon will consider the recovery daemon
    414         as hung and will try to restart it to recover.
    415       </para>
    416     </refsect2>
    417 
    418     <refsect2>
    419       <title>LogLatencyMs</title>
    420       <para>Default: 0</para>
    421       <para>
    422         When set to non-zero, this will make the main daemon log any operation that
    423         took longer than this value, in 'ms', to complete.
    424         These include "how long time a lockwait child process needed",
    425         "how long time to write to a persistent database" but also
    426         "how long did it take to get a response to a CALL from a remote node".
    427       </para>
    428     </refsect2>
    429 
    430     <refsect2>
    431       <title>RecLockLatencyMs</title>
    432       <para>Default: 1000</para>
    433       <para>
    434         When using a reclock file for split brain prevention, if set to non-zero
    435         this tunable will make the recovery dameon log a message if the fcntl()
    436         call to lock/testlock the recovery file takes longer than this number of
    437         ms.
    438       </para>
    439     </refsect2>
    440 
    441     <refsect2>
    442       <title>RecoveryDropAllIPs</title>
    443       <para>Default: 120</para>
    444       <para>
    445         If we have been stuck in recovery, or stopped, or banned, mode for
    446         this many seconds we will force drop all held public addresses.
     642       During a vacuuming run, ctdb usually processes only the records
     643       marked for deletion also called the fast path vacuuming. After
     644       finishing <varname>VacuumFastPathCount</varname> number of fast
     645       path vacuuming runs, ctdb will trigger a scan of complete database
     646       for any empty records that need to be deleted.
    447647      </para>
    448648    </refsect2>
     
    454654        Periodic interval in seconds when vacuuming is triggered for
    455655        volatile databases.
     656      </para>
     657    </refsect2>
     658
     659    <refsect2>
     660      <title>VacuumLimit</title>
     661      <para>Default: 5000</para>
     662      <para>
     663        During vacuuming, if the number of deleted records are more than
     664        <varname>VacuumLimit</varname>, then databases are repacked to
     665        avoid fragmentation.
     666      </para>
     667      <para>
     668        Databases are repacked only if both <varname>RepackLimit</varname>
     669        and <varname>VacuumLimit</varname> are exceeded.
    456670      </para>
    457671    </refsect2>
     
    468682
    469683    <refsect2>
    470       <title>RepackLimit</title>
    471       <para>Default: 10000</para>
    472       <para>
    473         During vacuuming, if the number of freelist records are more
    474         than <varname>RepackLimit</varname>, then databases are
    475         repacked to get rid of the freelist records to avoid
    476         fragmentation.
    477       </para>
    478       <para>
    479         Databases are repacked only if both
    480         <varname>RepackLimit</varname> and
    481         <varname>VacuumLimit</varname> are exceeded.
    482       </para>
    483     </refsect2>
    484 
    485     <refsect2>
    486       <title>VacuumLimit</title>
    487       <para>Default: 5000</para>
    488       <para>
    489         During vacuuming, if the number of deleted records are more
    490         than <varname>VacuumLimit</varname>, then databases are
    491         repacked to avoid fragmentation.
    492       </para>
    493       <para>
    494         Databases are repacked only if both
    495         <varname>RepackLimit</varname> and
    496         <varname>VacuumLimit</varname> are exceeded.
    497       </para>
    498     </refsect2>
    499 
    500     <refsect2>
    501       <title>VacuumFastPathCount</title>
    502       <para>Default: 60</para>
    503       <para>
    504         When a record is deleted, it is marked for deletion during
    505         vacuuming.  Vacuuming process usually processes this list to purge
    506         the records from the database.  If the number of records marked
    507         for deletion are more than VacuumFastPathCount, then vacuuming
    508         process will scan the complete database for empty records instead
    509         of using the list of records marked for deletion.
    510       </para>
    511     </refsect2>
    512 
    513     <refsect2>
    514       <title>DeferredAttachTO</title>
    515       <para>Default: 120</para>
    516       <para>
    517         When databases are frozen we do not allow clients to attach to the
    518         databases. Instead of returning an error immediately to the application
    519         the attach request from the client is deferred until the database
    520         becomes available again at which stage we respond to the client.
    521       </para>
    522       <para>
    523         This timeout controls how long we will defer the request from the client
    524         before timing it out and returning an error to the client.
    525       </para>
    526     </refsect2>
    527 
    528     <refsect2>
    529       <title>HopcountMakeSticky</title>
    530       <para>Default: 50</para>
    531       <para>
    532         If the database is set to 'STICKY' mode, using the 'ctdb setdbsticky'
    533         command, any record that is seen as very hot and migrating so fast that
    534         hopcount surpasses 50 is set to become a STICKY record for StickyDuration
    535         seconds. This means that after each migration the record will be kept on
    536         the node and prevented from being migrated off the node.
    537       </para>
    538       <para>
    539         This setting allows one to try to identify such records and stop them from
    540         migrating across the cluster so fast. This will improve performance for
    541         certain workloads, such as locking.tdb if many clients are opening/closing
    542         the same file concurrently.
    543       </para>
    544     </refsect2>
    545 
    546     <refsect2>
    547       <title>StickyDuration</title>
    548       <para>Default: 600</para>
    549       <para>
    550         Once a record has been found to be fetch-lock hot and has been flagged to
    551         become STICKY, this is for how long, in seconds, the record will be
    552         flagged as a STICKY record.
    553       </para>
    554     </refsect2>
    555 
    556     <refsect2>
    557       <title>StickyPindown</title>
    558       <para>Default: 200</para>
    559       <para>
    560         Once a STICKY record has been migrated onto a node, it will be pinned down
    561         on that node for this number of ms. Any request from other nodes to migrate
    562         the record off the node will be deferred until the pindown timer expires.
    563       </para>
    564     </refsect2>
    565 
    566     <refsect2>
    567       <title>StatHistoryInterval</title>
    568       <para>Default: 1</para>
    569       <para>
    570         Granularity of the statistics collected in the statistics history.
    571       </para>
    572     </refsect2>
    573 
    574     <refsect2>
    575       <title>AllowClientDBAttach</title>
    576       <para>Default: 1</para>
    577       <para>
    578         When set to 0, clients are not allowed to attach to any databases.
    579         This can be used to temporarily block any new processes from attaching
    580         to and accessing the databases.
    581       </para>
    582     </refsect2>
    583 
    584     <refsect2>
    585       <title>RecoverPDBBySeqNum</title>
    586       <para>Default: 1</para>
    587       <para>
    588         When set to zero, database recovery for persistent databases
    589         is record-by-record and recovery process simply collects the
    590         most recent version of every individual record.
    591       </para>
    592       <para>
    593         When set to non-zero, persistent databases will instead be
    594         recovered as a whole db and not by individual records. The
    595         node that contains the highest value stored in the record
    596         "__db_sequence_number__" is selected and the copy of that
    597         nodes database is used as the recovered database.
    598       </para>
    599       <para>
    600         By default, recovery of persistent databses is done using
    601         __db_sequence_number__ record.
    602       </para>
    603     </refsect2>
    604 
    605     <refsect2>
    606       <title>FetchCollapse</title>
    607       <para>Default: 1</para>
    608       <para>
    609         When many clients across many nodes try to access the same record at the
    610         same time this can lead to a fetch storm where the record becomes very
    611         active and bounces between nodes very fast. This leads to high CPU
    612         utilization of the ctdbd daemon, trying to bounce that record around
    613         very fast, and poor performance.
    614       </para>
    615       <para>
    616         This parameter is used to activate a fetch-collapse. A fetch-collapse
    617         is when we track which records we have requests in flight so that we only
    618         keep one request in flight from a certain node, even if multiple smbd
    619         processes are attemtping to fetch the record at the same time. This
    620         can improve performance and reduce CPU utilization for certain
    621         workloads.
    622       </para>
    623       <para>
    624         This timeout controls if we should collapse multiple fetch operations
    625         of the same record into a single request and defer all duplicates or not.
    626       </para>
    627     </refsect2>
    628 
    629     <refsect2>
    630       <title>Samba3AvoidDeadlocks</title>
    631       <para>Default: 0</para>
    632       <para>
    633         Enable code that prevents deadlocks with Samba (only for Samba 3.x).
    634       </para>
    635       <para>
    636         This should be set to 1 when using Samba version 3.x to enable special
    637         code in CTDB to avoid deadlock with Samba version 3.x.  This code
    638         is not required for Samba version 4.x and must not be enabled for
    639         Samba 4.x.
    640       </para>
    641     </refsect2>
     684      <title>VerboseMemoryNames</title>
     685      <para>Default: 0</para>
     686      <para>
     687        When set to non-zero, ctdb assigns verbose names for some of
     688        the talloc allocated memory objects.  These names are visible
     689        in the talloc memory report generated by 'ctdb dumpmemory'.
     690      </para>
     691    </refsect2>
     692
    642693  </refsect1>
    643694
  • vendor/current/ctdb/include/ctdb_private.h

    r988 r989  
    438438        uint32_t freeze_transaction_id;
    439439        uint32_t generation;
     440
     441        bool push_started;
     442        void *push_state;
    440443};
    441444
     
    874877int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata);
    875878
     879int32_t ctdb_control_db_pull(struct ctdb_context *ctdb,
     880                             struct ctdb_req_control_old *c,
     881                             TDB_DATA indata, TDB_DATA *outdata);
     882int32_t ctdb_control_db_push_start(struct ctdb_context *ctdb,
     883                                   TDB_DATA indata);
     884int32_t ctdb_control_db_push_confirm(struct ctdb_context *ctdb,
     885                                     TDB_DATA indata, TDB_DATA *outdata);
     886
    876887int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb);
    877888
  • vendor/current/ctdb/packaging/RPM/ctdb.spec.in

    r988 r989  
    208208%dir %{_libdir}
    209209%{_libdir}/ctdb/lib*
    210 %{_libdir}/libtevent-unix-util.so.0*
    211210%{_mandir}/man1/ctdb.1.gz
    212211%{_mandir}/man1/ctdbd.1.gz
     
    230229%files devel
    231230%defattr(-,root,root)
    232 %{_includedir}/ctdb/util/*.h
    233 %{_libdir}/libtevent-unix-util.so
    234231
    235232%package tests
     
    246243%dir %{_datadir}/%{name}-tests
    247244%{_datadir}/%{name}-tests/*
    248 %dir %{_libdir}/%{name}-tests
    249 %{_libdir}/%{name}-tests/*
     245%dir %{_libexecdir}/%{name}/tests
     246%{_libexecdir}/%{name}/tests/*
    250247%{_bindir}/ctdb_run_tests
    251248%{_bindir}/ctdb_run_cluster_tests
  • vendor/current/ctdb/protocol/protocol.h

    r988 r989  
    121121/* SRVID prefix used by CTDB */
    122122#define CTDB_SRVID_PREFIX       0xF000000000000000LL
     123
     124/* SRVID prefix used during recovery for pulling and pushing databases */
     125#define CTDB_SRVID_RECOVERY     0xF001000000000000LL
     126
     127/* SRVID to assign of banning credits */
     128#define CTDB_SRVID_BANNING      0xF002000000000000LL
    123129
    124130/* SRVID to inform of election data */
     
    356362                    CTDB_CONTROL_DB_TRANSACTION_COMMIT   = 144,
    357363                    CTDB_CONTROL_DB_TRANSACTION_CANCEL   = 145,
     364                    CTDB_CONTROL_DB_PULL                 = 146,
     365                    CTDB_CONTROL_DB_PUSH_START           = 147,
     366                    CTDB_CONTROL_DB_PUSH_CONFIRM         = 148,
    358367};
    359368
     
    456465#define CTDB_LMASTER_ANY        0xffffffff
    457466        uint32_t lmaster;
     467};
     468
     469struct ctdb_pulldb_ext {
     470        uint32_t db_id;
     471        uint32_t lmaster;
     472        uint64_t srvid;
    458473};
    459474
     
    620635        uint32_t mutex_enabled;
    621636        uint32_t lock_processes_per_db;
     637        uint32_t rec_buffer_size_limit;
    622638};
    623639
     
    705721 */
    706722#define CTDB_CAP_PARALLEL_RECOVERY      0x00010000
    707 
    708 #define CTDB_CAP_FEATURES               (CTDB_CAP_PARALLEL_RECOVERY)
     723#define CTDB_CAP_FRAGMENTED_CONTROLS    0x00020000
     724
     725#define CTDB_CAP_FEATURES               (CTDB_CAP_PARALLEL_RECOVERY | \
     726                                         CTDB_CAP_FRAGMENTED_CONTROLS)
    709727
    710728#define CTDB_CAP_DEFAULT                (CTDB_CAP_RECMASTER | \
     
    856874                uint32_t loglevel;
    857875                struct ctdb_pulldb *pulldb;
     876                struct ctdb_pulldb_ext *pulldb_ext;
    858877                struct ctdb_rec_buffer *recbuf;
    859878                uint32_t recmode;
     
    924943                struct ctdb_db_statistics *dbstats;
    925944                enum ctdb_runstate runstate;
     945                uint32_t num_records;
    926946        } data;
    927947};
     
    977997        /* SRVID_MEM_DUMP, SRVID_TAKEOVER_RUN */
    978998        struct ctdb_srvid_message *msg;
    979         /* SRVID_REBALANCE_NODE */
     999        /* SRVID_BANNING, SRVID_REBALANCE_NODE */
    9801000        uint32_t pnn;
    9811001        /* SRVID_DISABLE_TAKEOVER_RUNS, SRVID_DISABLE_RECOVERIES */
  • vendor/current/ctdb/protocol/protocol_api.h

    r988 r989  
    5353                             void *private_data);
    5454
     55int ctdb_rec_buffer_write(struct ctdb_rec_buffer *recbuf, int fd);
     56int ctdb_rec_buffer_read(int fd, TALLOC_CTX *mem_ctx,
     57                         struct ctdb_rec_buffer **out);
     58
    5559size_t ctdb_server_id_len(struct ctdb_server_id *sid);
    5660void ctdb_server_id_push(struct ctdb_server_id *sid, uint8_t *buf);
     
    635639int ctdb_reply_control_db_transaction_cancel(struct ctdb_reply_control *reply);
    636640
     641void ctdb_req_control_db_pull(struct ctdb_req_control *request,
     642                              struct ctdb_pulldb_ext *pulldb_ext);
     643int ctdb_reply_control_db_pull(struct ctdb_reply_control *reply,
     644                               uint32_t *num_records);
     645
     646void ctdb_req_control_db_push_start(struct ctdb_req_control *request,
     647                                    struct ctdb_pulldb_ext *pulldb_ext);
     648int ctdb_reply_control_db_push_start(struct ctdb_reply_control *reply);
     649
     650void ctdb_req_control_db_push_confirm(struct ctdb_req_control *request,
     651                                      uint32_t db_id);
     652int ctdb_reply_control_db_push_confirm(struct ctdb_reply_control *reply,
     653                                       uint32_t *num_records);
     654
    637655/* From protocol/protocol_message.c */
    638656
  • vendor/current/ctdb/protocol/protocol_client.c

    r988 r989  
    24692469        return ctdb_reply_control_generic(reply);
    24702470}
     2471
     2472/* CTDB_CONTROL_DB_PULL */
     2473
     2474void ctdb_req_control_db_pull(struct ctdb_req_control *request,
     2475                              struct ctdb_pulldb_ext *pulldb_ext)
     2476{
     2477        request->opcode = CTDB_CONTROL_DB_PULL;
     2478        request->pad = 0;
     2479        request->srvid = 0;
     2480        request->client_id = 0;
     2481        request->flags = 0;
     2482
     2483        request->rdata.opcode = CTDB_CONTROL_DB_PULL;
     2484        request->rdata.data.pulldb_ext = pulldb_ext;
     2485}
     2486
     2487int ctdb_reply_control_db_pull(struct ctdb_reply_control *reply,
     2488                               uint32_t *num_records)
     2489{
     2490        if (reply->status == 0 &&
     2491            reply->rdata.opcode == CTDB_CONTROL_DB_PULL) {
     2492                *num_records = reply->rdata.data.num_records;
     2493        }
     2494        return reply->status;
     2495}
     2496
     2497/* CTDB_CONTROL_DB_PUSH_START */
     2498
     2499void ctdb_req_control_db_push_start(struct ctdb_req_control *request,
     2500                                    struct ctdb_pulldb_ext *pulldb_ext)
     2501{
     2502        request->opcode = CTDB_CONTROL_DB_PUSH_START;
     2503        request->pad = 0;
     2504        request->srvid = 0;
     2505        request->client_id = 0;
     2506        request->flags = 0;
     2507
     2508        request->rdata.opcode = CTDB_CONTROL_DB_PUSH_START;
     2509        request->rdata.data.pulldb_ext = pulldb_ext;
     2510}
     2511
     2512int ctdb_reply_control_db_push_start(struct ctdb_reply_control *reply)
     2513{
     2514        return ctdb_reply_control_generic(reply);
     2515}
     2516
     2517/* CTDB_CONTROL_DB_PUSH_CONFIRM */
     2518
     2519void ctdb_req_control_db_push_confirm(struct ctdb_req_control *request,
     2520                                      uint32_t db_id)
     2521{
     2522        request->opcode = CTDB_CONTROL_DB_PUSH_CONFIRM;
     2523        request->pad = 0;
     2524        request->srvid = 0;
     2525        request->client_id = 0;
     2526        request->flags = 0;
     2527
     2528        request->rdata.opcode = CTDB_CONTROL_DB_PUSH_CONFIRM;
     2529        request->rdata.data.db_id = db_id;
     2530}
     2531
     2532int ctdb_reply_control_db_push_confirm(struct ctdb_reply_control *reply,
     2533                                       uint32_t *num_records)
     2534{
     2535        if (reply->status == 0 &&
     2536            reply->rdata.opcode == CTDB_CONTROL_DB_PUSH_CONFIRM) {
     2537                *num_records = reply->rdata.data.num_records;
     2538        }
     2539        return reply->status;
     2540}
  • vendor/current/ctdb/protocol/protocol_control.c

    r988 r989  
    5050{
    5151        size_t len = 0;
    52         uint64_t u64;
    5352
    5453        if (cd == NULL) {
     
    385384
    386385        case CTDB_CONTROL_GET_DB_SEQNUM:
    387                 u64 = cd->data.db_id;
    388                 len = ctdb_uint64_len(u64);
     386                len = ctdb_uint64_len((uint64_t)cd->data.db_id);
    389387                break;
    390388
     
    480478
    481479        case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
     480                len = ctdb_uint32_len(cd->data.db_id);
     481                break;
     482
     483        case CTDB_CONTROL_DB_PULL:
     484                len = ctdb_pulldb_ext_len(cd->data.pulldb_ext);
     485                break;
     486
     487        case CTDB_CONTROL_DB_PUSH_START:
     488                len = ctdb_pulldb_ext_len(cd->data.pulldb_ext);
     489                break;
     490
     491        case CTDB_CONTROL_DB_PUSH_CONFIRM:
    482492                len = ctdb_uint32_len(cd->data.db_id);
    483493                break;
     
    490500                                       uint8_t *buf)
    491501{
    492         uint64_t u64;
    493 
    494502        switch (cd->opcode) {
    495503        case CTDB_CONTROL_PROCESS_EXISTS:
     
    713721
    714722        case CTDB_CONTROL_GET_DB_SEQNUM:
    715                 u64 = cd->data.db_id;
    716                 ctdb_uint64_push(u64, buf);
     723                ctdb_uint32_push(cd->data.db_id, buf);
    717724                break;
    718725
     
    790797
    791798        case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
     799                ctdb_uint32_push(cd->data.db_id, buf);
     800                break;
     801
     802        case CTDB_CONTROL_DB_PULL:
     803                ctdb_pulldb_ext_push(cd->data.pulldb_ext, buf);
     804                break;
     805
     806        case CTDB_CONTROL_DB_PUSH_START:
     807                ctdb_pulldb_ext_push(cd->data.pulldb_ext, buf);
     808                break;
     809
     810        case CTDB_CONTROL_DB_PUSH_CONFIRM:
    792811                ctdb_uint32_push(cd->data.db_id, buf);
    793812                break;
     
    801820{
    802821        int ret = 0;
    803         uint64_t u64 = 0;
    804822
    805823        cd->opcode = opcode;
     
    10801098
    10811099        case CTDB_CONTROL_GET_DB_SEQNUM:
    1082                 ret = ctdb_uint64_pull(buf, buflen, mem_ctx, &u64);
    1083                 cd->data.db_id = (uint32_t)u64;
     1100                ret = ctdb_uint32_pull(buf, buflen, mem_ctx,
     1101                                       &cd->data.db_id);
    10841102                break;
    10851103
     
    11781196                                        &cd->data.db_id);
    11791197                break;
     1198
     1199        case CTDB_CONTROL_DB_PULL:
     1200                ret = ctdb_pulldb_ext_pull(buf, buflen, mem_ctx,
     1201                                           &cd->data.pulldb_ext);
     1202                break;
     1203
     1204        case CTDB_CONTROL_DB_PUSH_START:
     1205                ret = ctdb_pulldb_ext_pull(buf, buflen, mem_ctx,
     1206                                           &cd->data.pulldb_ext);
     1207                break;
     1208
     1209        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1210                ret = ctdb_uint32_pull(buf, buflen, mem_ctx,
     1211                                       &cd->data.db_id);
     1212                break;
    11801213        }
    11811214
     
    15741607
    15751608        case CTDB_CONTROL_DB_TRANSACTION_CANCEL:
     1609                break;
     1610
     1611        case CTDB_CONTROL_DB_PULL:
     1612                len = ctdb_uint32_len(cd->data.num_records);
     1613                break;
     1614
     1615        case CTDB_CONTROL_DB_PUSH_START:
     1616                break;
     1617
     1618        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1619                len = ctdb_uint32_len(cd->data.num_records);
    15761620                break;
    15771621        }
     
    17271771                ctdb_node_map_push(cd->data.nodemap, buf);
    17281772                break;
     1773
     1774        case CTDB_CONTROL_DB_PULL:
     1775                ctdb_uint32_push(cd->data.num_records, buf);
     1776                break;
     1777
     1778        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1779                ctdb_uint32_push(cd->data.num_records, buf);
     1780                break;
    17291781        }
    17301782}
     
    19131965                ret = ctdb_node_map_pull(buf, buflen, mem_ctx,
    19141966                                         &cd->data.nodemap);
     1967                break;
     1968
     1969        case CTDB_CONTROL_DB_PULL:
     1970                ret = ctdb_uint32_pull(buf, buflen, mem_ctx,
     1971                                       &cd->data.num_records);
     1972                break;
     1973
     1974        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1975                ret = ctdb_uint32_pull(buf, buflen, mem_ctx,
     1976                                       &cd->data.num_records);
    19151977                break;
    19161978        }
  • vendor/current/ctdb/protocol/protocol_message.c

    r988 r989  
    4141
    4242        switch (srvid) {
     43        case CTDB_SRVID_BANNING:
     44                len = ctdb_uint32_len(mdata->pnn);
     45                break;
     46
    4347        case CTDB_SRVID_ELECTION:
    4448                len = ctdb_election_message_len(mdata->election);
     
    115119{
    116120        switch (srvid) {
     121        case CTDB_SRVID_BANNING:
     122                ctdb_uint32_push(mdata->pnn, buf);
     123                break;
     124
    117125        case CTDB_SRVID_ELECTION:
    118126                ctdb_election_message_push(mdata->election, buf);
     
    190198
    191199        switch (srvid) {
     200        case CTDB_SRVID_BANNING:
     201                ret = ctdb_uint32_pull(buf, buflen, mem_ctx, &mdata->pnn);
     202                break;
     203
    192204        case CTDB_SRVID_ELECTION:
    193205                ret = ctdb_election_message_pull(buf, buflen, mem_ctx,
  • vendor/current/ctdb/protocol/protocol_private.h

    r988 r989  
    9191int ctdb_pulldb_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
    9292                     struct ctdb_pulldb **out);
     93
     94size_t ctdb_pulldb_ext_len(struct ctdb_pulldb_ext *pulldb);
     95void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *pulldb, uint8_t *buf);
     96int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
     97                         struct ctdb_pulldb_ext **out);
    9398
    9499size_t ctdb_traverse_start_len(struct ctdb_traverse_start *traverse);
  • vendor/current/ctdb/protocol/protocol_types.c

    r988 r989  
    466466
    467467        pulldb = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_pulldb));
     468        if (pulldb == NULL) {
     469                return ENOMEM;
     470        }
     471
     472        *out = pulldb;
     473        return 0;
     474}
     475
     476size_t ctdb_pulldb_ext_len(struct ctdb_pulldb_ext *pulldb)
     477{
     478        return sizeof(struct ctdb_pulldb_ext);
     479}
     480
     481void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *pulldb, uint8_t *buf)
     482{
     483        memcpy(buf, pulldb, sizeof(struct ctdb_pulldb_ext));
     484}
     485
     486int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx,
     487                         struct ctdb_pulldb_ext **out)
     488{
     489        struct ctdb_pulldb_ext *pulldb;
     490
     491        if (buflen < sizeof(struct ctdb_pulldb_ext)) {
     492                return EMSGSIZE;
     493        }
     494
     495        pulldb = talloc_memdup(mem_ctx, buf, sizeof(struct ctdb_pulldb_ext));
    468496        if (pulldb == NULL) {
    469497                return ENOMEM;
     
    771799
    772800        return ret;
     801}
     802
     803int ctdb_rec_buffer_write(struct ctdb_rec_buffer *recbuf, int fd)
     804{
     805        ssize_t n;
     806
     807        n = write(fd, &recbuf->db_id, sizeof(uint32_t));
     808        if (n == -1 || n != sizeof(uint32_t)) {
     809                return (errno != 0 ? errno : EIO);
     810        }
     811        n = write(fd, &recbuf->count, sizeof(uint32_t));
     812        if (n == -1 || n != sizeof(uint32_t)) {
     813                return (errno != 0 ? errno : EIO);
     814        }
     815        n = write(fd, &recbuf->buflen, sizeof(size_t));
     816        if (n == -1 || n != sizeof(size_t)) {
     817                return (errno != 0 ? errno : EIO);
     818        }
     819        n = write(fd, recbuf->buf, recbuf->buflen);
     820        if (n == -1 || n != recbuf->buflen) {
     821                return (errno != 0 ? errno : EIO);
     822        }
     823
     824        return 0;
     825}
     826
     827int ctdb_rec_buffer_read(int fd, TALLOC_CTX *mem_ctx,
     828                         struct ctdb_rec_buffer **out)
     829{
     830        struct ctdb_rec_buffer *recbuf;
     831        ssize_t n;
     832
     833        recbuf = talloc(mem_ctx, struct ctdb_rec_buffer);
     834        if (recbuf == NULL) {
     835                return ENOMEM;
     836        }
     837
     838        n = read(fd, &recbuf->db_id, sizeof(uint32_t));
     839        if (n == -1 || n != sizeof(uint32_t)) {
     840                return (errno != 0 ? errno : EIO);
     841        }
     842        n = read(fd, &recbuf->count, sizeof(uint32_t));
     843        if (n == -1 || n != sizeof(uint32_t)) {
     844                return (errno != 0 ? errno : EIO);
     845        }
     846        n = read(fd, &recbuf->buflen, sizeof(size_t));
     847        if (n == -1 || n != sizeof(size_t)) {
     848                return (errno != 0 ? errno : EIO);
     849        }
     850
     851        recbuf->buf = talloc_size(recbuf, recbuf->buflen);
     852        if (recbuf->buf == NULL) {
     853                return ENOMEM;
     854        }
     855
     856        n = read(fd, recbuf->buf, recbuf->buflen);
     857        if (n == -1 || n != recbuf->buflen) {
     858                return (errno != 0 ? errno : EIO);
     859        }
     860
     861        *out = recbuf;
     862        return 0;
    773863}
    774864
  • vendor/current/ctdb/server/ctdb_control.c

    r988 r989  
    716716                return ctdb_control_db_transaction_cancel(ctdb, indata);
    717717
     718        case CTDB_CONTROL_DB_PULL:
     719                CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_pulldb_ext));
     720                return ctdb_control_db_pull(ctdb, c, indata, outdata);
     721
     722        case CTDB_CONTROL_DB_PUSH_START:
     723                CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_pulldb_ext));
     724                return ctdb_control_db_push_start(ctdb, indata);
     725
     726        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     727                CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
     728                return ctdb_control_db_push_confirm(ctdb, indata, outdata);
     729
    718730        default:
    719731                DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
  • vendor/current/ctdb/server/ctdb_daemon.c

    r988 r989  
    4444#include "common/common.h"
    4545#include "common/logging.h"
     46#include "common/pidfile.h"
    4647
    4748struct ctdb_client_pid_list {
     
    5354
    5455const char *ctdbd_pidfile = NULL;
     56static struct pidfile_context *ctdbd_pidfile_ctx = NULL;
    5557
    5658static void daemon_incoming_packet(void *, struct ctdb_req_header *);
     
    984986{
    985987        struct sockaddr_un addr;
     988        int ret;
    986989
    987990        ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
     
    994997        strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
    995998
    996         /* First check if an old ctdbd might be running */
    997         if (connect(ctdb->daemon.sd,
    998                     (struct sockaddr *)&addr, sizeof(addr)) == 0) {
    999                 DEBUG(DEBUG_CRIT,
    1000                       ("Something is already listening on ctdb socket '%s'\n",
    1001                        ctdb->daemon.name));
    1002                 goto failed;
    1003         }
    1004 
    1005999        /* Remove any old socket */
    1006         unlink(ctdb->daemon.name);
     1000        ret = unlink(ctdb->daemon.name);
     1001        if (ret == 0) {
     1002                DEBUG(DEBUG_WARNING,
     1003                      ("Removed stale socket %s\n", ctdb->daemon.name));
     1004        } else if (errno != ENOENT) {
     1005                DEBUG(DEBUG_ERR,
     1006                      ("Failed to remove stale socket %s\n", ctdb->daemon.name));
     1007                return -1;
     1008        }
    10071009
    10081010        set_close_on_exec(ctdb->daemon.sd);
     
    11281130static void ctdb_remove_pidfile(void)
    11291131{
    1130         /* Only the main ctdbd's PID matches the SID */
    1131         if (ctdbd_pidfile != NULL && getsid(0) == getpid()) {
    1132                 if (unlink(ctdbd_pidfile) == 0) {
    1133                         DEBUG(DEBUG_NOTICE, ("Removed PID file %s\n",
    1134                                              ctdbd_pidfile));
    1135                 } else {
    1136                         DEBUG(DEBUG_WARNING, ("Failed to Remove PID file %s\n",
    1137                                               ctdbd_pidfile));
    1138                 }
    1139         }
    1140 }
    1141 
    1142 static void ctdb_create_pidfile(pid_t pid)
     1132        TALLOC_FREE(ctdbd_pidfile_ctx);
     1133}
     1134
     1135static void ctdb_create_pidfile(TALLOC_CTX *mem_ctx)
    11431136{
    11441137        if (ctdbd_pidfile != NULL) {
    1145                 FILE *fp;
    1146 
    1147                 fp = fopen(ctdbd_pidfile, "w");
    1148                 if (fp == NULL) {
    1149                         DEBUG(DEBUG_ALERT,
    1150                               ("Failed to open PID file %s\n", ctdbd_pidfile));
     1138                int ret = pidfile_create(mem_ctx, ctdbd_pidfile,
     1139                                         &ctdbd_pidfile_ctx);
     1140                if (ret != 0) {
     1141                        DEBUG(DEBUG_ERR,
     1142                              ("Failed to create PID file %s\n",
     1143                              ctdbd_pidfile));
    11511144                        exit(11);
    11521145                }
    11531146
    1154                 fprintf(fp, "%d\n", pid);
    1155                 fclose(fp);
    11561147                DEBUG(DEBUG_NOTICE, ("Created PID file %s\n", ctdbd_pidfile));
    11571148                atexit(ctdb_remove_pidfile);
     
    12141205        int res, ret = -1;
    12151206        struct tevent_fd *fde;
     1207
     1208        if (do_fork && fork()) {
     1209                return 0;
     1210        }
     1211
     1212        if (do_fork) {
     1213                if (setsid() == -1) {
     1214                        ctdb_die(ctdb, "Failed to setsid()\n");
     1215                }
     1216                close(0);
     1217                if (open("/dev/null", O_RDONLY) != 0) {
     1218                        DEBUG(DEBUG_ALERT,(__location__ " Failed to setup stdin on /dev/null\n"));
     1219                        exit(11);
     1220                }
     1221        }
     1222        ignore_signal(SIGPIPE);
     1223        ignore_signal(SIGUSR1);
     1224
     1225        ctdb->ctdbd_pid = getpid();
     1226        DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n",
     1227                          CTDB_VERSION_STRING, ctdb->ctdbd_pid));
     1228        ctdb_create_pidfile(ctdb);
    12161229
    12171230        /* create a unix domain stream socket to listen to */
     
    12221235        }
    12231236
    1224         if (do_fork && fork()) {
    1225                 return 0;
    1226         }
    1227 
    1228         tdb_reopen_all(false);
    1229 
    1230         if (do_fork) {
    1231                 if (setsid() == -1) {
    1232                         ctdb_die(ctdb, "Failed to setsid()\n");
    1233                 }
    1234                 close(0);
    1235                 if (open("/dev/null", O_RDONLY) != 0) {
    1236                         DEBUG(DEBUG_ALERT,(__location__ " Failed to setup stdin on /dev/null\n"));
    1237                         exit(11);
    1238                 }
    1239         }
    1240         ignore_signal(SIGPIPE);
    1241         ignore_signal(SIGUSR1);
    1242 
    1243         ctdb->ctdbd_pid = getpid();
    1244         DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n",
    1245                           CTDB_VERSION_STRING, ctdb->ctdbd_pid));
    1246         ctdb_create_pidfile(ctdb->ctdbd_pid);
    1247 
    12481237        /* Make sure we log something when the daemon terminates.
    12491238         * This must be the first exit handler to run (so the last to
     
    12611250
    12621251        ctdb->ev = tevent_context_init(NULL);
     1252        if (ctdb->ev == NULL) {
     1253                DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
     1254                exit(1);
     1255        }
    12631256        tevent_loop_allow_nesting(ctdb->ev);
    12641257        tevent_set_trace_callback(ctdb->ev, ctdb_tevent_trace, ctdb);
     
    18461839        /* get a new event context */
    18471840        ctdb->ev = tevent_context_init(ctdb);
     1841        if (ctdb->ev == NULL) {
     1842                DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
     1843                exit(1);
     1844        }
    18481845        tevent_loop_allow_nesting(ctdb->ev);
    18491846
  • vendor/current/ctdb/server/ctdb_logging.c

    r988 r989  
    282282        /* We'll fail if stderr/stdout not already open; it's simpler. */
    283283        old_stdout = dup(STDOUT_FILENO);
     284        if (old_stdout < 0) {
     285                DEBUG(DEBUG_ERR, ("Failed to dup stdout for child logging\n"));
     286                return -1;
     287        }
    284288        old_stderr = dup(STDERR_FILENO);
    285         if (old_stdout < 0 || old_stderr < 0) {
    286                 DEBUG(DEBUG_ERR, ("Failed to dup stdout/stderr for child logging\n"));
     289        if (old_stderr < 0) {
     290                DEBUG(DEBUG_ERR, ("Failed to dup stderr for child logging\n"));
     291                close(old_stdout);
    287292                return -1;
    288293        }
  • vendor/current/ctdb/server/ctdb_recover.c

    r988 r989  
    314314}
    315315
     316struct db_pull_state {
     317        struct ctdb_context *ctdb;
     318        struct ctdb_db_context *ctdb_db;
     319        struct ctdb_marshall_buffer *recs;
     320        uint32_t pnn;
     321        uint64_t srvid;
     322        uint32_t num_records;
     323};
     324
     325static int traverse_db_pull(struct tdb_context *tdb, TDB_DATA key,
     326                            TDB_DATA data, void *private_data)
     327{
     328        struct db_pull_state *state = (struct db_pull_state *)private_data;
     329        struct ctdb_marshall_buffer *recs;
     330
     331        recs = ctdb_marshall_add(state->ctdb, state->recs,
     332                                 state->ctdb_db->db_id, 0, key, NULL, data);
     333        if (recs == NULL) {
     334                TALLOC_FREE(state->recs);
     335                return -1;
     336        }
     337        state->recs = recs;
     338
     339        if (talloc_get_size(state->recs) >=
     340                        state->ctdb->tunable.rec_buffer_size_limit) {
     341                TDB_DATA buffer;
     342                int ret;
     343
     344                buffer = ctdb_marshall_finish(state->recs);
     345                ret = ctdb_daemon_send_message(state->ctdb, state->pnn,
     346                                               state->srvid, buffer);
     347                if (ret != 0) {
     348                        TALLOC_FREE(state->recs);
     349                        return -1;
     350                }
     351
     352                state->num_records += state->recs->count;
     353                TALLOC_FREE(state->recs);
     354        }
     355
     356        return 0;
     357}
     358
     359int32_t ctdb_control_db_pull(struct ctdb_context *ctdb,
     360                             struct ctdb_req_control_old *c,
     361                             TDB_DATA indata, TDB_DATA *outdata)
     362{
     363        struct ctdb_pulldb_ext *pulldb_ext;
     364        struct ctdb_db_context *ctdb_db;
     365        struct db_pull_state state;
     366        int ret;
     367
     368        pulldb_ext = (struct ctdb_pulldb_ext *)indata.dptr;
     369
     370        ctdb_db = find_ctdb_db(ctdb, pulldb_ext->db_id);
     371        if (ctdb_db == NULL) {
     372                DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n",
     373                                 pulldb_ext->db_id));
     374                return -1;
     375        }
     376
     377        if (!ctdb_db_frozen(ctdb_db)) {
     378                DEBUG(DEBUG_ERR,
     379                      ("rejecting ctdb_control_pull_db when not frozen\n"));
     380                return -1;
     381        }
     382
     383        if (ctdb_db->unhealthy_reason) {
     384                /* this is just a warning, as the tdb should be empty anyway */
     385                DEBUG(DEBUG_WARNING,
     386                      ("db(%s) unhealty in ctdb_control_db_pull: %s\n",
     387                       ctdb_db->db_name, ctdb_db->unhealthy_reason));
     388        }
     389
     390        state.ctdb = ctdb;
     391        state.ctdb_db = ctdb_db;
     392        state.recs = NULL;
     393        state.pnn = c->hdr.srcnode;
     394        state.srvid = pulldb_ext->srvid;
     395        state.num_records = 0;
     396
     397        if (ctdb_lockdb_mark(ctdb_db) != 0) {
     398                DEBUG(DEBUG_ERR,
     399                      (__location__ " Failed to get lock on entire db - failing\n"));
     400                return -1;
     401        }
     402
     403        ret = tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_db_pull, &state);
     404        if (ret == -1) {
     405                DEBUG(DEBUG_ERR,
     406                      (__location__ " Failed to get traverse db '%s'\n",
     407                       ctdb_db->db_name));
     408                ctdb_lockdb_unmark(ctdb_db);
     409                return -1;
     410        }
     411
     412        /* Last few records */
     413        if (state.recs != NULL) {
     414                TDB_DATA buffer;
     415
     416                buffer = ctdb_marshall_finish(state.recs);
     417                ret = ctdb_daemon_send_message(state.ctdb, state.pnn,
     418                                               state.srvid, buffer);
     419                if (ret != 0) {
     420                        TALLOC_FREE(state.recs);
     421                        ctdb_lockdb_unmark(ctdb_db);
     422                        return -1;
     423                }
     424
     425                state.num_records += state.recs->count;
     426                TALLOC_FREE(state.recs);
     427        }
     428
     429        ctdb_lockdb_unmark(ctdb_db);
     430
     431        outdata->dptr = talloc_size(outdata, sizeof(uint32_t));
     432        if (outdata->dptr == NULL) {
     433                DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n"));
     434                return -1;
     435        }
     436
     437        memcpy(outdata->dptr, (uint8_t *)&state.num_records, sizeof(uint32_t));
     438        outdata->dsize = sizeof(uint32_t);
     439
     440        return 0;
     441}
     442
    316443/*
    317444  push a bunch of records into a ltdb, filtering by rsn
     
    406533        ctdb_lockdb_unmark(ctdb_db);
    407534        return -1;
     535}
     536
     537struct db_push_state {
     538        struct ctdb_context *ctdb;
     539        struct ctdb_db_context *ctdb_db;
     540        uint64_t srvid;
     541        uint32_t num_records;
     542        bool failed;
     543};
     544
     545static void db_push_msg_handler(uint64_t srvid, TDB_DATA indata,
     546                                void *private_data)
     547{
     548        struct db_push_state *state = talloc_get_type(
     549                private_data, struct db_push_state);
     550        struct ctdb_marshall_buffer *recs;
     551        struct ctdb_rec_data_old *rec;
     552        int i, ret;
     553
     554        if (state->failed) {
     555                return;
     556        }
     557
     558        recs = (struct ctdb_marshall_buffer *)indata.dptr;
     559        rec = (struct ctdb_rec_data_old *)&recs->data[0];
     560
     561        DEBUG(DEBUG_INFO, ("starting push of %u records for dbid 0x%x\n",
     562                           recs->count, recs->db_id));
     563
     564        for (i=0; i<recs->count; i++) {
     565                TDB_DATA key, data;
     566                struct ctdb_ltdb_header *hdr;
     567
     568                key.dptr = &rec->data[0];
     569                key.dsize = rec->keylen;
     570                data.dptr = &rec->data[key.dsize];
     571                data.dsize = rec->datalen;
     572
     573                if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
     574                        DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n"));
     575                        goto failed;
     576                }
     577
     578                hdr = (struct ctdb_ltdb_header *)data.dptr;
     579                /* Strip off any read only record flags.
     580                 * All readonly records are revoked implicitely by a recovery.
     581                 */
     582                hdr->flags &= ~CTDB_REC_RO_FLAGS;
     583
     584                data.dptr += sizeof(*hdr);
     585                data.dsize -= sizeof(*hdr);
     586
     587                ret = ctdb_ltdb_store(state->ctdb_db, key, hdr, data);
     588                if (ret != 0) {
     589                        DEBUG(DEBUG_ERR,
     590                              (__location__ " Unable to store record\n"));
     591                        goto failed;
     592                }
     593
     594                rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec);
     595        }
     596
     597        DEBUG(DEBUG_DEBUG, ("finished push of %u records for dbid 0x%x\n",
     598                            recs->count, recs->db_id));
     599
     600        state->num_records += recs->count;
     601        return;
     602
     603failed:
     604        state->failed = true;
     605}
     606
     607int32_t ctdb_control_db_push_start(struct ctdb_context *ctdb, TDB_DATA indata)
     608{
     609        struct ctdb_pulldb_ext *pulldb_ext;
     610        struct ctdb_db_context *ctdb_db;
     611        struct db_push_state *state;
     612        int ret;
     613
     614        pulldb_ext = (struct ctdb_pulldb_ext *)indata.dptr;
     615
     616        ctdb_db = find_ctdb_db(ctdb, pulldb_ext->db_id);
     617        if (ctdb_db == NULL) {
     618                DEBUG(DEBUG_ERR,
     619                      (__location__ " Unknown db 0x%08x\n", pulldb_ext->db_id));
     620                return -1;
     621        }
     622
     623        if (!ctdb_db_frozen(ctdb_db)) {
     624                DEBUG(DEBUG_ERR,
     625                      ("rejecting ctdb_control_db_push_start when not frozen\n"));
     626                return -1;
     627        }
     628
     629        if (ctdb_db->push_started) {
     630                DEBUG(DEBUG_WARNING,
     631                      (__location__ " DB push already started for %s\n",
     632                       ctdb_db->db_name));
     633
     634                /* De-register old state */
     635                state = (struct db_push_state *)ctdb_db->push_state;
     636                if (state != NULL) {
     637                        srvid_deregister(ctdb->srv, state->srvid, state);
     638                        talloc_free(state);
     639                        ctdb_db->push_state = NULL;
     640                }
     641        }
     642
     643        state = talloc_zero(ctdb_db, struct db_push_state);
     644        if (state == NULL) {
     645                DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n"));
     646                return -1;
     647        }
     648
     649        state->ctdb = ctdb;
     650        state->ctdb_db = ctdb_db;
     651        state->srvid = pulldb_ext->srvid;
     652        state->failed = false;
     653
     654        ret = srvid_register(ctdb->srv, state, state->srvid,
     655                             db_push_msg_handler, state);
     656        if (ret != 0) {
     657                DEBUG(DEBUG_ERR,
     658                      (__location__ " Failed to register srvid for db push\n"));
     659                talloc_free(state);
     660                return -1;
     661        }
     662
     663        if (ctdb_lockdb_mark(ctdb_db) != 0) {
     664                DEBUG(DEBUG_ERR,
     665                      (__location__ " Failed to get lock on entire db - failing\n"));
     666                srvid_deregister(ctdb->srv, state->srvid, state);
     667                talloc_free(state);
     668                return -1;
     669        }
     670
     671        ctdb_db->push_started = true;
     672        ctdb_db->push_state = state;
     673
     674        return 0;
     675}
     676
     677int32_t ctdb_control_db_push_confirm(struct ctdb_context *ctdb,
     678                                     TDB_DATA indata, TDB_DATA *outdata)
     679{
     680        uint32_t db_id;
     681        struct ctdb_db_context *ctdb_db;
     682        struct db_push_state *state;
     683
     684        db_id = *(uint32_t *)indata.dptr;
     685
     686        ctdb_db = find_ctdb_db(ctdb, db_id);
     687        if (ctdb_db == NULL) {
     688                DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
     689                return -1;
     690        }
     691
     692        if (!ctdb_db_frozen(ctdb_db)) {
     693                DEBUG(DEBUG_ERR,
     694                      ("rejecting ctdb_control_db_push_confirm when not frozen\n"));
     695                return -1;
     696        }
     697
     698        if (!ctdb_db->push_started) {
     699                DEBUG(DEBUG_ERR, (__location__ " DB push not started\n"));
     700                return -1;
     701        }
     702
     703        if (ctdb_db->readonly) {
     704                DEBUG(DEBUG_ERR,
     705                      ("Clearing the tracking database for dbid 0x%x\n",
     706                       ctdb_db->db_id));
     707                if (tdb_wipe_all(ctdb_db->rottdb) != 0) {
     708                        DEBUG(DEBUG_ERR,
     709                              ("Failed to wipe tracking database for 0x%x."
     710                               " Dropping read-only delegation support\n",
     711                               ctdb_db->db_id));
     712                        ctdb_db->readonly = false;
     713                        tdb_close(ctdb_db->rottdb);
     714                        ctdb_db->rottdb = NULL;
     715                        ctdb_db->readonly = false;
     716                }
     717
     718                while (ctdb_db->revokechild_active != NULL) {
     719                        talloc_free(ctdb_db->revokechild_active);
     720                }
     721        }
     722
     723        ctdb_lockdb_unmark(ctdb_db);
     724
     725        state = (struct db_push_state *)ctdb_db->push_state;
     726        if (state == NULL) {
     727                DEBUG(DEBUG_ERR, (__location__ " Missing push db state\n"));
     728                return -1;
     729        }
     730
     731        srvid_deregister(ctdb->srv, state->srvid, state);
     732
     733        outdata->dptr = talloc_size(outdata, sizeof(uint32_t));
     734        if (outdata->dptr == NULL) {
     735                DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n"));
     736                talloc_free(state);
     737                ctdb_db->push_state = NULL;
     738                return -1;
     739        }
     740
     741        memcpy(outdata->dptr, (uint8_t *)&state->num_records, sizeof(uint32_t));
     742        outdata->dsize = sizeof(uint32_t);
     743
     744        talloc_free(state);
     745        ctdb_db->push_started = false;
     746        ctdb_db->push_state = NULL;
     747
     748        return 0;
    408749}
    409750
     
    10331374                if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
    10341375                        DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record in indata\n"));
     1376                        talloc_free(records);
    10351377                        return -1;
    10361378                }
     
    12251567                        DEBUG(DEBUG_CRIT, (__location__ " bad ltdb record "
    12261568                                           "in indata\n"));
     1569                        talloc_free(records);
    12271570                        return -1;
    12281571                }
  • vendor/current/ctdb/server/ctdb_recoverd.c

    r988 r989  
    253253        uint32_t *force_rebalance_nodes;
    254254        struct ctdb_node_capabilities *caps;
     255        bool frozen_on_inactive;
    255256};
    256257
     
    17821783        }
    17831784
     1785        setenv("CTDB_DBDIR_STATE", rec->ctdb->db_directory_state, 1);
     1786
    17841787        if (!ctdb_vfork_with_logging(state, rec->ctdb, "recovery", prog, nargs,
    17851788                                     args, NULL, NULL, &state->pid)) {
     
    19811984
    19821985        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - disabled recovery mode\n"));
     1986
     1987        /* execute the "recovered" event script on all nodes */
     1988        ret = run_recovered_eventscript(rec, nodemap, "do_recovery");
     1989        if (ret!=0) {
     1990                DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster. Recovery process failed.\n"));
     1991                return -1;
     1992        }
     1993
     1994        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - finished the recovered event\n"));
    19831995
    19841996        return 0;
     
    21572169        do_takeover_run(rec, nodemap, false);
    21582170
    2159         /* execute the "recovered" event script on all nodes */
    2160         ret = run_recovered_eventscript(rec, nodemap, "do_recovery");
    2161         if (ret!=0) {
    2162                 DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster. Recovery process failed.\n"));
    2163                 goto fail;
    2164         }
    2165 
    2166         DEBUG(DEBUG_NOTICE, (__location__ " Recovery - finished the recovered event\n"));
    2167 
    21682171        /* send a message to all clients telling them that the cluster
    21692172           has been reconfigured */
     
    26592662}
    26602663
     2664/*
     2665 * handler for assigning banning credits
     2666 */
     2667static void banning_handler(uint64_t srvid, TDB_DATA data, void *private_data)
     2668{
     2669        struct ctdb_recoverd *rec = talloc_get_type(
     2670                private_data, struct ctdb_recoverd);
     2671        uint32_t ban_pnn;
     2672
     2673        /* Ignore if we are not recmaster */
     2674        if (rec->ctdb->pnn != rec->recmaster) {
     2675                return;
     2676        }
     2677
     2678        if (data.dsize != sizeof(uint32_t)) {
     2679                DEBUG(DEBUG_ERR, (__location__ "invalid data size %zu\n",
     2680                                  data.dsize));
     2681                return;
     2682        }
     2683
     2684        ban_pnn = *(uint32_t *)data.dptr;
     2685
     2686        ctdb_set_culprit_count(rec, ban_pnn, rec->nodemap->num);
     2687}
    26612688
    26622689/*
     
    34903517                                return;
    34913518                        }
    3492                         ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
     3519                }
     3520                if (! rec->frozen_on_inactive) {
     3521                        ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(),
     3522                                               CTDB_CURRENT_NODE);
    34933523                        if (ret != 0) {
    3494                                 DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node in STOPPED or BANNED state\n"));
     3524                                DEBUG(DEBUG_ERR,
     3525                                      (__location__ " Failed to freeze node "
     3526                                       "in STOPPED or BANNED state\n"));
    34953527                                return;
    34963528                        }
     3529
     3530                        rec->frozen_on_inactive = true;
    34973531                }
    34983532
     
    35033537                return;
    35043538        }
     3539
     3540        rec->frozen_on_inactive = false;
    35053541
    35063542        /* If we are not the recmaster then do some housekeeping */
     
    38833919
    38843920        rec->priority_time = timeval_current();
     3921        rec->frozen_on_inactive = false;
    38853922
    38863923        /* register a message port for sending memory dumps */
    38873924        ctdb_client_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
     3925
     3926        /* when a node is assigned banning credits */
     3927        ctdb_client_set_message_handler(ctdb, CTDB_SRVID_BANNING,
     3928                                        banning_handler, rec);
    38883929
    38893930        /* register a message port for recovery elections */
  • vendor/current/ctdb/server/ctdb_recovery_helper.c

    r988 r989  
    3535#include "client/client.h"
    3636
    37 #define TIMEOUT()       timeval_current_ofs(10, 0)
     37static int recover_timeout = 30;
     38
     39#define NUM_RETRIES     3
     40
     41#define TIMEOUT()       timeval_current_ofs(recover_timeout, 0)
    3842
    3943static void LOG(const char *fmt, ...)
     
    6468}
    6569
     70static bool generic_recv(struct tevent_req *req, int *perr)
     71{
     72        int err;
     73
     74        if (tevent_req_is_unix_error(req, &err)) {
     75                if (perr != NULL) {
     76                        *perr = err;
     77                }
     78                return false;
     79        }
     80
     81        return true;
     82}
     83
     84static uint64_t rec_srvid = CTDB_SRVID_RECOVERY;
     85
     86static uint64_t srvid_next(void)
     87{
     88        rec_srvid += 1;
     89        return rec_srvid;
     90}
     91
    6692/*
    6793 * Recovery database functions
     
    81107                                          uint32_t hash_size, bool persistent)
    82108{
     109        static char *db_dir_state = NULL;
    83110        struct recdb_context *recdb;
    84111        unsigned int tdb_flags;
     
    89116        }
    90117
     118        if (db_dir_state == NULL) {
     119                db_dir_state = getenv("CTDB_DBDIR_STATE");
     120        }
     121
    91122        recdb->db_name = db_name;
    92123        recdb->db_id = db_id;
    93124        recdb->db_path = talloc_asprintf(recdb, "%s/recdb.%s",
    94                                          dirname(discard_const(db_path)),
     125                                         db_dir_state != NULL ?
     126                                            db_dir_state :
     127                                            dirname(discard_const(db_path)),
    95128                                         db_name);
    96129        if (recdb->db_path == NULL) {
     
    113146}
    114147
     148static uint32_t recdb_id(struct recdb_context *recdb)
     149{
     150        return recdb->db_id;
     151}
     152
    115153static const char *recdb_name(struct recdb_context *recdb)
    116154{
    117155        return recdb->db_name;
     156}
     157
     158static const char *recdb_path(struct recdb_context *recdb)
     159{
     160        return recdb->db_path;
     161}
     162
     163static struct tdb_context *recdb_tdb(struct recdb_context *recdb)
     164{
     165        return recdb->db->tdb;
     166}
     167
     168static bool recdb_persistent(struct recdb_context *recdb)
     169{
     170        return recdb->persistent;
    118171}
    119172
     
    141194
    142195        /* fetch the existing record, if any */
    143         prev_data = tdb_fetch(state->recdb->db->tdb, key);
     196        prev_data = tdb_fetch(recdb_tdb(state->recdb), key);
    144197
    145198        if (prev_data.dptr != NULL) {
     
    155208        }
    156209
    157         ret = tdb_store(state->recdb->db->tdb, key, data, TDB_REPLACE);
     210        ret = tdb_store(recdb_tdb(state->recdb), key, data, TDB_REPLACE);
    158211        if (ret != 0) {
    159212                return -1;
     
    179232}
    180233
    181 struct recdb_traverse_state {
    182         struct ctdb_rec_buffer *recbuf;
    183         uint32_t pnn;
    184         uint32_t reqid;
    185         bool persistent;
    186         bool failed;
    187 };
    188 
    189 static int recdb_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
    190                           void *private_data)
    191 {
    192         struct recdb_traverse_state *state =
    193                 (struct recdb_traverse_state *)private_data;
     234/* This function decides which records from recdb are retained */
     235static int recbuf_filter_add(struct ctdb_rec_buffer *recbuf, bool persistent,
     236                             uint32_t reqid, uint32_t dmaster,
     237                             TDB_DATA key, TDB_DATA data)
     238{
    194239        struct ctdb_ltdb_header *header;
    195240        int ret;
     
    225270         * data structures built from the various tdb-level records.
    226271         */
    227         if (!state->persistent &&
    228             data.dsize <= sizeof(struct ctdb_ltdb_header)) {
     272        if (!persistent && data.dsize <= sizeof(struct ctdb_ltdb_header)) {
    229273                return 0;
    230274        }
     
    232276        /* update the dmaster field to point to us */
    233277        header = (struct ctdb_ltdb_header *)data.dptr;
    234         if (!state->persistent) {
    235                 header->dmaster = state->pnn;
     278        if (!persistent) {
     279                header->dmaster = dmaster;
    236280                header->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA;
    237281        }
    238282
    239         ret = ctdb_rec_buffer_add(state->recbuf, state->recbuf, state->reqid,
    240                                   NULL, key, data);
     283        ret = ctdb_rec_buffer_add(recbuf, recbuf, reqid, NULL, key, data);
     284        if (ret != 0) {
     285                return ret;
     286        }
     287
     288        return 0;
     289}
     290
     291struct recdb_records_traverse_state {
     292        struct ctdb_rec_buffer *recbuf;
     293        uint32_t dmaster;
     294        uint32_t reqid;
     295        bool persistent;
     296        bool failed;
     297};
     298
     299static int recdb_records_traverse(struct tdb_context *tdb,
     300                                  TDB_DATA key, TDB_DATA data,
     301                                  void *private_data)
     302{
     303        struct recdb_records_traverse_state *state =
     304                (struct recdb_records_traverse_state *)private_data;
     305        int ret;
     306
     307        ret = recbuf_filter_add(state->recbuf, state->persistent,
     308                                state->reqid, state->dmaster, key, data);
    241309        if (ret != 0) {
    242310                state->failed = true;
     
    248316
    249317static struct ctdb_rec_buffer *recdb_records(struct recdb_context *recdb,
    250                                              TALLOC_CTX *mem_ctx, uint32_t pnn)
    251 {
    252         struct recdb_traverse_state state;
    253         int ret;
    254 
    255         state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb->db_id);
     318                                             TALLOC_CTX *mem_ctx,
     319                                             uint32_t dmaster)
     320{
     321        struct recdb_records_traverse_state state;
     322        int ret;
     323
     324        state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb_id(recdb));
    256325        if (state.recbuf == NULL) {
    257326                return NULL;
    258327        }
    259         state.pnn = pnn;
     328        state.dmaster = dmaster;
    260329        state.reqid = 0;
    261         state.persistent = recdb->persistent;
     330        state.persistent = recdb_persistent(recdb);
    262331        state.failed = false;
    263332
    264         ret = tdb_traverse_read(recdb->db->tdb, recdb_traverse, &state);
     333        ret = tdb_traverse_read(recdb_tdb(recdb), recdb_records_traverse,
     334                                &state);
     335        if (ret == -1 || state.failed) {
     336                LOG("Failed to marshall recovery records for %s\n",
     337                    recdb_name(recdb));
     338                TALLOC_FREE(state.recbuf);
     339                return NULL;
     340        }
     341
     342        return state.recbuf;
     343}
     344
     345struct recdb_file_traverse_state {
     346        struct ctdb_rec_buffer *recbuf;
     347        struct recdb_context *recdb;
     348        TALLOC_CTX *mem_ctx;
     349        uint32_t dmaster;
     350        uint32_t reqid;
     351        bool persistent;
     352        bool failed;
     353        int fd;
     354        int max_size;
     355        int num_buffers;
     356};
     357
     358static int recdb_file_traverse(struct tdb_context *tdb,
     359                               TDB_DATA key, TDB_DATA data,
     360                               void *private_data)
     361{
     362        struct recdb_file_traverse_state *state =
     363                (struct recdb_file_traverse_state *)private_data;
     364        int ret;
     365
     366        ret = recbuf_filter_add(state->recbuf, state->persistent,
     367                                state->reqid, state->dmaster, key, data);
     368        if (ret != 0) {
     369                state->failed = true;
     370                return ret;
     371        }
     372
     373        if (ctdb_rec_buffer_len(state->recbuf) > state->max_size) {
     374                ret = ctdb_rec_buffer_write(state->recbuf, state->fd);
     375                if (ret != 0) {
     376                        LOG("Failed to collect recovery records for %s\n",
     377                            recdb_name(state->recdb));
     378                        state->failed = true;
     379                        return ret;
     380                }
     381
     382                state->num_buffers += 1;
     383
     384                TALLOC_FREE(state->recbuf);
     385                state->recbuf = ctdb_rec_buffer_init(state->mem_ctx,
     386                                                     recdb_id(state->recdb));
     387                if (state->recbuf == NULL) {
     388                        state->failed = true;
     389                        return ENOMEM;
     390                }
     391        }
     392
     393        return 0;
     394}
     395
     396static int recdb_file(struct recdb_context *recdb, TALLOC_CTX *mem_ctx,
     397                      uint32_t dmaster, int fd, int max_size)
     398{
     399        struct recdb_file_traverse_state state;
     400        int ret;
     401
     402        state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb_id(recdb));
     403        if (state.recbuf == NULL) {
     404                return -1;
     405        }
     406        state.recdb = recdb;
     407        state.mem_ctx = mem_ctx;
     408        state.dmaster = dmaster;
     409        state.reqid = 0;
     410        state.persistent = recdb_persistent(recdb);
     411        state.failed = false;
     412        state.fd = fd;
     413        state.max_size = max_size;
     414        state.num_buffers = 0;
     415
     416        ret = tdb_traverse_read(recdb_tdb(recdb), recdb_file_traverse, &state);
    265417        if (ret == -1 || state.failed) {
    266418                TALLOC_FREE(state.recbuf);
     419                return -1;
     420        }
     421
     422        ret = ctdb_rec_buffer_write(state.recbuf, fd);
     423        if (ret != 0) {
     424                LOG("Failed to collect recovery records for %s\n",
     425                    recdb_name(recdb));
     426                TALLOC_FREE(state.recbuf);
     427                return -1;
     428        }
     429        state.num_buffers += 1;
     430
     431        LOG("Wrote %d buffers of recovery records for %s\n",
     432            state.num_buffers, recdb_name(recdb));
     433
     434        return state.num_buffers;
     435}
     436
     437/*
     438 * Pull database from a single node
     439 */
     440
     441struct pull_database_state {
     442        struct tevent_context *ev;
     443        struct ctdb_client_context *client;
     444        struct recdb_context *recdb;
     445        uint32_t pnn;
     446        uint64_t srvid;
     447        int num_records;
     448};
     449
     450static void pull_database_handler(uint64_t srvid, TDB_DATA data,
     451                                  void *private_data);
     452static void pull_database_register_done(struct tevent_req *subreq);
     453static void pull_database_old_done(struct tevent_req *subreq);
     454static void pull_database_unregister_done(struct tevent_req *subreq);
     455static void pull_database_new_done(struct tevent_req *subreq);
     456
     457static struct tevent_req *pull_database_send(
     458                        TALLOC_CTX *mem_ctx,
     459                        struct tevent_context *ev,
     460                        struct ctdb_client_context *client,
     461                        uint32_t pnn, uint32_t caps,
     462                        struct recdb_context *recdb)
     463{
     464        struct tevent_req *req, *subreq;
     465        struct pull_database_state *state;
     466        struct ctdb_req_control request;
     467
     468        req = tevent_req_create(mem_ctx, &state, struct pull_database_state);
     469        if (req == NULL) {
    267470                return NULL;
    268471        }
    269472
    270         return state.recbuf;
     473        state->ev = ev;
     474        state->client = client;
     475        state->recdb = recdb;
     476        state->pnn = pnn;
     477        state->srvid = srvid_next();
     478
     479        if (caps & CTDB_CAP_FRAGMENTED_CONTROLS) {
     480                subreq = ctdb_client_set_message_handler_send(
     481                                        state, state->ev, state->client,
     482                                        state->srvid, pull_database_handler,
     483                                        req);
     484                if (tevent_req_nomem(subreq, req)) {
     485                        return tevent_req_post(req, ev);
     486                }
     487
     488                tevent_req_set_callback(subreq, pull_database_register_done,
     489                                        req);
     490
     491        } else {
     492                struct ctdb_pulldb pulldb;
     493
     494                pulldb.db_id = recdb_id(recdb);
     495                pulldb.lmaster = CTDB_LMASTER_ANY;
     496
     497                ctdb_req_control_pull_db(&request, &pulldb);
     498                subreq = ctdb_client_control_send(state, state->ev,
     499                                                  state->client,
     500                                                  pnn, TIMEOUT(),
     501                                                  &request);
     502                if (tevent_req_nomem(subreq, req)) {
     503                        return tevent_req_post(req, ev);
     504                }
     505                tevent_req_set_callback(subreq, pull_database_old_done, req);
     506        }
     507
     508        return req;
     509}
     510
     511static void pull_database_handler(uint64_t srvid, TDB_DATA data,
     512                                  void *private_data)
     513{
     514        struct tevent_req *req = talloc_get_type_abort(
     515                private_data, struct tevent_req);
     516        struct pull_database_state *state = tevent_req_data(
     517                req, struct pull_database_state);
     518        struct ctdb_rec_buffer *recbuf;
     519        int ret;
     520        bool status;
     521
     522        if (srvid != state->srvid) {
     523                return;
     524        }
     525
     526        ret = ctdb_rec_buffer_pull(data.dptr, data.dsize, state, &recbuf);
     527        if (ret != 0) {
     528                LOG("Invalid data received for DB_PULL messages\n");
     529                return;
     530        }
     531
     532        if (recbuf->db_id != recdb_id(state->recdb)) {
     533                talloc_free(recbuf);
     534                LOG("Invalid dbid:%08x for DB_PULL messages for %s\n",
     535                    recbuf->db_id, recdb_name(state->recdb));
     536                return;
     537        }
     538
     539        status = recdb_add(state->recdb, ctdb_client_pnn(state->client),
     540                           recbuf);
     541        if (! status) {
     542                talloc_free(recbuf);
     543                LOG("Failed to add records to recdb for %s\n",
     544                    recdb_name(state->recdb));
     545                return;
     546        }
     547
     548        state->num_records += recbuf->count;
     549        talloc_free(recbuf);
     550}
     551
     552static void pull_database_register_done(struct tevent_req *subreq)
     553{
     554        struct tevent_req *req = tevent_req_callback_data(
     555                subreq, struct tevent_req);
     556        struct pull_database_state *state = tevent_req_data(
     557                req, struct pull_database_state);
     558        struct ctdb_req_control request;
     559        struct ctdb_pulldb_ext pulldb_ext;
     560        int ret;
     561        bool status;
     562
     563        status = ctdb_client_set_message_handler_recv(subreq, &ret);
     564        TALLOC_FREE(subreq);
     565        if (! status) {
     566                LOG("failed to set message handler for DB_PULL for %s\n",
     567                    recdb_name(state->recdb));
     568                tevent_req_error(req, ret);
     569                return;
     570        }
     571
     572        pulldb_ext.db_id = recdb_id(state->recdb);
     573        pulldb_ext.lmaster = CTDB_LMASTER_ANY;
     574        pulldb_ext.srvid = state->srvid;
     575
     576        ctdb_req_control_db_pull(&request, &pulldb_ext);
     577        subreq = ctdb_client_control_send(state, state->ev, state->client,
     578                                          state->pnn, TIMEOUT(), &request);
     579        if (tevent_req_nomem(subreq, req)) {
     580                return;
     581        }
     582        tevent_req_set_callback(subreq, pull_database_new_done, req);
     583}
     584
     585static void pull_database_old_done(struct tevent_req *subreq)
     586{
     587        struct tevent_req *req = tevent_req_callback_data(
     588                subreq, struct tevent_req);
     589        struct pull_database_state *state = tevent_req_data(
     590                req, struct pull_database_state);
     591        struct ctdb_reply_control *reply;
     592        struct ctdb_rec_buffer *recbuf;
     593        int ret;
     594        bool status;
     595
     596        status = ctdb_client_control_recv(subreq, &ret, state, &reply);
     597        TALLOC_FREE(subreq);
     598        if (! status) {
     599                LOG("control PULL_DB failed for %s on node %u, ret=%d\n",
     600                    recdb_name(state->recdb), state->pnn, ret);
     601                tevent_req_error(req, ret);
     602                return;
     603        }
     604
     605        ret = ctdb_reply_control_pull_db(reply, state, &recbuf);
     606        talloc_free(reply);
     607        if (ret != 0) {
     608                tevent_req_error(req, ret);
     609                return;
     610        }
     611
     612        status = recdb_add(state->recdb, ctdb_client_pnn(state->client),
     613                           recbuf);
     614        if (! status) {
     615                talloc_free(recbuf);
     616                tevent_req_error(req, EIO);
     617                return;
     618        }
     619
     620        state->num_records = recbuf->count;
     621        talloc_free(recbuf);
     622
     623        LOG("Pulled %d records for db %s from node %d\n",
     624            state->num_records, recdb_name(state->recdb), state->pnn);
     625
     626        tevent_req_done(req);
     627}
     628
     629static void pull_database_new_done(struct tevent_req *subreq)
     630{
     631        struct tevent_req *req = tevent_req_callback_data(
     632                subreq, struct tevent_req);
     633        struct pull_database_state *state = tevent_req_data(
     634                req, struct pull_database_state);
     635        struct ctdb_reply_control *reply;
     636        uint32_t num_records;
     637        int ret;
     638        bool status;
     639
     640        status = ctdb_client_control_recv(subreq, &ret, state, &reply);
     641        TALLOC_FREE(subreq);
     642        if (! status) {
     643                LOG("control DB_PULL failed for %s on node %u, ret=%d\n",
     644                    recdb_name(state->recdb), state->pnn, ret);
     645                tevent_req_error(req, ret);
     646                return;
     647        }
     648
     649        ret = ctdb_reply_control_db_pull(reply, &num_records);
     650        talloc_free(reply);
     651        if (num_records != state->num_records) {
     652                LOG("mismatch (%u != %u) in DB_PULL records for %s\n",
     653                    num_records, state->num_records, recdb_name(state->recdb));
     654                tevent_req_error(req, EIO);
     655                return;
     656        }
     657
     658        LOG("Pulled %d records for db %s from node %d\n",
     659            state->num_records, recdb_name(state->recdb), state->pnn);
     660
     661        subreq = ctdb_client_remove_message_handler_send(
     662                                        state, state->ev, state->client,
     663                                        state->srvid, req);
     664        if (tevent_req_nomem(subreq, req)) {
     665                return;
     666        }
     667        tevent_req_set_callback(subreq, pull_database_unregister_done, req);
     668}
     669
     670static void pull_database_unregister_done(struct tevent_req *subreq)
     671{
     672        struct tevent_req *req = tevent_req_callback_data(
     673                subreq, struct tevent_req);
     674        struct pull_database_state *state = tevent_req_data(
     675                req, struct pull_database_state);
     676        int ret;
     677        bool status;
     678
     679        status = ctdb_client_remove_message_handler_recv(subreq, &ret);
     680        TALLOC_FREE(subreq);
     681        if (! status) {
     682                LOG("failed to remove message handler for DB_PULL for %s\n",
     683                    recdb_name(state->recdb));
     684                tevent_req_error(req, ret);
     685                return;
     686        }
     687
     688        tevent_req_done(req);
     689}
     690
     691static bool pull_database_recv(struct tevent_req *req, int *perr)
     692{
     693        return generic_recv(req, perr);
     694}
     695
     696/*
     697 * Push database to specified nodes (old style)
     698 */
     699
     700struct push_database_old_state {
     701        struct tevent_context *ev;
     702        struct ctdb_client_context *client;
     703        struct recdb_context *recdb;
     704        uint32_t *pnn_list;
     705        int count;
     706        struct ctdb_rec_buffer *recbuf;
     707        int index;
     708};
     709
     710static void push_database_old_push_done(struct tevent_req *subreq);
     711
     712static struct tevent_req *push_database_old_send(
     713                        TALLOC_CTX *mem_ctx,
     714                        struct tevent_context *ev,
     715                        struct ctdb_client_context *client,
     716                        uint32_t *pnn_list, int count,
     717                        struct recdb_context *recdb)
     718{
     719        struct tevent_req *req, *subreq;
     720        struct push_database_old_state *state;
     721        struct ctdb_req_control request;
     722        uint32_t pnn;
     723
     724        req = tevent_req_create(mem_ctx, &state,
     725                                struct push_database_old_state);
     726        if (req == NULL) {
     727                return NULL;
     728        }
     729
     730        state->ev = ev;
     731        state->client = client;
     732        state->recdb = recdb;
     733        state->pnn_list = pnn_list;
     734        state->count = count;
     735        state->index = 0;
     736
     737        state->recbuf = recdb_records(recdb, state,
     738                                      ctdb_client_pnn(client));
     739        if (tevent_req_nomem(state->recbuf, req)) {
     740                return tevent_req_post(req, ev);
     741        }
     742
     743        pnn = state->pnn_list[state->index];
     744
     745        ctdb_req_control_push_db(&request, state->recbuf);
     746        subreq = ctdb_client_control_send(state, ev, client, pnn,
     747                                          TIMEOUT(), &request);
     748        if (tevent_req_nomem(subreq, req)) {
     749                return tevent_req_post(req, ev);
     750        }
     751        tevent_req_set_callback(subreq, push_database_old_push_done, req);
     752
     753        return req;
     754}
     755
     756static void push_database_old_push_done(struct tevent_req *subreq)
     757{
     758        struct tevent_req *req = tevent_req_callback_data(
     759                subreq, struct tevent_req);
     760        struct push_database_old_state *state = tevent_req_data(
     761                req, struct push_database_old_state);
     762        struct ctdb_req_control request;
     763        uint32_t pnn;
     764        int ret;
     765        bool status;
     766
     767        status = ctdb_client_control_recv(subreq, &ret, NULL, NULL);
     768        TALLOC_FREE(subreq);
     769        if (! status) {
     770                LOG("control PUSH_DB failed for db %s on node %u, ret=%d\n",
     771                    recdb_name(state->recdb), state->pnn_list[state->index],
     772                    ret);
     773                tevent_req_error(req, ret);
     774                return;
     775        }
     776
     777        state->index += 1;
     778        if (state->index == state->count) {
     779                TALLOC_FREE(state->recbuf);
     780                tevent_req_done(req);
     781                return;
     782        }
     783
     784        pnn = state->pnn_list[state->index];
     785
     786        ctdb_req_control_push_db(&request, state->recbuf);
     787        subreq = ctdb_client_control_send(state, state->ev, state->client,
     788                                          pnn, TIMEOUT(), &request);
     789        if (tevent_req_nomem(subreq, req)) {
     790                return;
     791        }
     792        tevent_req_set_callback(subreq, push_database_old_push_done, req);
     793}
     794
     795static bool push_database_old_recv(struct tevent_req *req, int *perr)
     796{
     797        return generic_recv(req, perr);
     798}
     799
     800/*
     801 * Push database to specified nodes (new style)
     802 */
     803
     804struct push_database_new_state {
     805        struct tevent_context *ev;
     806        struct ctdb_client_context *client;
     807        struct recdb_context *recdb;
     808        uint32_t *pnn_list;
     809        int count;
     810        uint64_t srvid;
     811        uint32_t dmaster;
     812        int fd;
     813        int num_buffers;
     814        int num_buffers_sent;
     815        int num_records;
     816};
     817
     818static void push_database_new_started(struct tevent_req *subreq);
     819static void push_database_new_send_msg(struct tevent_req *req);
     820static void push_database_new_send_done(struct tevent_req *subreq);
     821static void push_database_new_confirmed(struct tevent_req *subreq);
     822
     823static struct tevent_req *push_database_new_send(
     824                        TALLOC_CTX *mem_ctx,
     825                        struct tevent_context *ev,
     826                        struct ctdb_client_context *client,
     827                        uint32_t *pnn_list, int count,
     828                        struct recdb_context *recdb,
     829                        int max_size)
     830{
     831        struct tevent_req *req, *subreq;
     832        struct push_database_new_state *state;
     833        struct ctdb_req_control request;
     834        struct ctdb_pulldb_ext pulldb_ext;
     835        char *filename;
     836        off_t offset;
     837
     838        req = tevent_req_create(mem_ctx, &state,
     839                                struct push_database_new_state);
     840        if (req == NULL) {
     841                return NULL;
     842        }
     843
     844        state->ev = ev;
     845        state->client = client;
     846        state->recdb = recdb;
     847        state->pnn_list = pnn_list;
     848        state->count = count;
     849
     850        state->srvid = srvid_next();
     851        state->dmaster = ctdb_client_pnn(client);
     852        state->num_buffers_sent = 0;
     853        state->num_records = 0;
     854
     855        filename = talloc_asprintf(state, "%s.dat", recdb_path(recdb));
     856        if (tevent_req_nomem(filename, req)) {
     857                return tevent_req_post(req, ev);
     858        }
     859
     860        state->fd = open(filename, O_RDWR|O_CREAT, 0644);
     861        if (state->fd == -1) {
     862                tevent_req_error(req, errno);
     863                return tevent_req_post(req, ev);
     864        }
     865        unlink(filename);
     866        talloc_free(filename);
     867
     868        state->num_buffers = recdb_file(recdb, state, state->dmaster,
     869                                        state->fd, max_size);
     870        if (state->num_buffers == -1) {
     871                tevent_req_error(req, ENOMEM);
     872                return tevent_req_post(req, ev);
     873        }
     874
     875        offset = lseek(state->fd, 0, SEEK_SET);
     876        if (offset != 0) {
     877                tevent_req_error(req, EIO);
     878                return tevent_req_post(req, ev);
     879        }
     880
     881        pulldb_ext.db_id = recdb_id(recdb);
     882        pulldb_ext.srvid = state->srvid;
     883
     884        ctdb_req_control_db_push_start(&request, &pulldb_ext);
     885        subreq = ctdb_client_control_multi_send(state, ev, client,
     886                                                pnn_list, count,
     887                                                TIMEOUT(), &request);
     888        if (tevent_req_nomem(subreq, req)) {
     889                return tevent_req_post(req, ev);
     890        }
     891        tevent_req_set_callback(subreq, push_database_new_started, req);
     892
     893        return req;
     894}
     895
     896static void push_database_new_started(struct tevent_req *subreq)
     897{
     898        struct tevent_req *req = tevent_req_callback_data(
     899                subreq, struct tevent_req);
     900        struct push_database_new_state *state = tevent_req_data(
     901                req, struct push_database_new_state);
     902        int *err_list;
     903        int ret;
     904        bool status;
     905
     906        status = ctdb_client_control_multi_recv(subreq, &ret, state,
     907                                                &err_list, NULL);
     908        TALLOC_FREE(subreq);
     909        if (! status) {
     910                int ret2;
     911                uint32_t pnn;
     912
     913                ret2 = ctdb_client_control_multi_error(state->pnn_list,
     914                                                       state->count,
     915                                                       err_list, &pnn);
     916                if (ret2 != 0) {
     917                        LOG("control DB_PUSH_START failed for db %s "
     918                            "on node %u, ret=%d\n",
     919                            recdb_name(state->recdb), pnn, ret2);
     920                } else {
     921                        LOG("control DB_PUSH_START failed for db %s, ret=%d\n",
     922                            recdb_name(state->recdb), ret);
     923                }
     924                talloc_free(err_list);
     925
     926                tevent_req_error(req, ret);
     927                return;
     928        }
     929
     930        push_database_new_send_msg(req);
     931}
     932
     933static void push_database_new_send_msg(struct tevent_req *req)
     934{
     935        struct push_database_new_state *state = tevent_req_data(
     936                req, struct push_database_new_state);
     937        struct tevent_req *subreq;
     938        struct ctdb_rec_buffer *recbuf;
     939        struct ctdb_req_message message;
     940        TDB_DATA data;
     941        int ret;
     942
     943        if (state->num_buffers_sent == state->num_buffers) {
     944                struct ctdb_req_control request;
     945
     946                ctdb_req_control_db_push_confirm(&request,
     947                                                 recdb_id(state->recdb));
     948                subreq = ctdb_client_control_multi_send(state, state->ev,
     949                                                        state->client,
     950                                                        state->pnn_list,
     951                                                        state->count,
     952                                                        TIMEOUT(), &request);
     953                if (tevent_req_nomem(subreq, req)) {
     954                        return;
     955                }
     956                tevent_req_set_callback(subreq, push_database_new_confirmed,
     957                                        req);
     958                return;
     959        }
     960
     961        ret = ctdb_rec_buffer_read(state->fd, state, &recbuf);
     962        if (ret != 0) {
     963                tevent_req_error(req, ret);
     964                return;
     965        }
     966
     967        data.dsize = ctdb_rec_buffer_len(recbuf);
     968        data.dptr = talloc_size(state, data.dsize);
     969        if (tevent_req_nomem(data.dptr, req)) {
     970                return;
     971        }
     972
     973        ctdb_rec_buffer_push(recbuf, data.dptr);
     974
     975        message.srvid = state->srvid;
     976        message.data.data = data;
     977
     978        LOG("Pushing buffer %d with %d records for %s\n",
     979            state->num_buffers_sent, recbuf->count, recdb_name(state->recdb));
     980
     981        subreq = ctdb_client_message_multi_send(state, state->ev,
     982                                                state->client,
     983                                                state->pnn_list, state->count,
     984                                                &message);
     985        if (tevent_req_nomem(subreq, req)) {
     986                return;
     987        }
     988        tevent_req_set_callback(subreq, push_database_new_send_done, req);
     989
     990        state->num_records += recbuf->count;
     991
     992        talloc_free(data.dptr);
     993        talloc_free(recbuf);
     994}
     995
     996static void push_database_new_send_done(struct tevent_req *subreq)
     997{
     998        struct tevent_req *req = tevent_req_callback_data(
     999                subreq, struct tevent_req);
     1000        struct push_database_new_state *state = tevent_req_data(
     1001                req, struct push_database_new_state);
     1002        bool status;
     1003        int ret;
     1004
     1005        status = ctdb_client_message_multi_recv(subreq, &ret, NULL, NULL);
     1006        TALLOC_FREE(subreq);
     1007        if (! status) {
     1008                LOG("Sending recovery records failed for %s\n",
     1009                    recdb_name(state->recdb));
     1010                tevent_req_error(req, ret);
     1011                return;
     1012        }
     1013
     1014        state->num_buffers_sent += 1;
     1015
     1016        push_database_new_send_msg(req);
     1017}
     1018
     1019static void push_database_new_confirmed(struct tevent_req *subreq)
     1020{
     1021        struct tevent_req *req = tevent_req_callback_data(
     1022                subreq, struct tevent_req);
     1023        struct push_database_new_state *state = tevent_req_data(
     1024                req, struct push_database_new_state);
     1025        struct ctdb_reply_control **reply;
     1026        int *err_list;
     1027        bool status;
     1028        int ret, i;
     1029        uint32_t num_records;
     1030
     1031        status = ctdb_client_control_multi_recv(subreq, &ret, state,
     1032                                                &err_list, &reply);
     1033        TALLOC_FREE(subreq);
     1034        if (! status) {
     1035                int ret2;
     1036                uint32_t pnn;
     1037
     1038                ret2 = ctdb_client_control_multi_error(state->pnn_list,
     1039                                                       state->count, err_list,
     1040                                                       &pnn);
     1041                if (ret2 != 0) {
     1042                        LOG("control DB_PUSH_CONFIRM failed for %s on node %u,"
     1043                            " ret=%d\n", recdb_name(state->recdb), pnn, ret2);
     1044                } else {
     1045                        LOG("control DB_PUSH_CONFIRM failed for %s, ret=%d\n",
     1046                            recdb_name(state->recdb), ret);
     1047                }
     1048                tevent_req_error(req, ret);
     1049                return;
     1050        }
     1051
     1052        for (i=0; i<state->count; i++) {
     1053                ret = ctdb_reply_control_db_push_confirm(reply[i],
     1054                                                         &num_records);
     1055                if (ret != 0) {
     1056                        tevent_req_error(req, EPROTO);
     1057                        return;
     1058                }
     1059
     1060                if (num_records != state->num_records) {
     1061                        LOG("Node %u received %d of %d records for %s\n",
     1062                            state->pnn_list[i], num_records,
     1063                            state->num_records, recdb_name(state->recdb));
     1064                        tevent_req_error(req, EPROTO);
     1065                        return;
     1066                }
     1067        }
     1068
     1069        talloc_free(reply);
     1070
     1071        LOG("Pushed %d records for db %s\n",
     1072            state->num_records, recdb_name(state->recdb));
     1073
     1074        tevent_req_done(req);
     1075}
     1076
     1077static bool push_database_new_recv(struct tevent_req *req, int *perr)
     1078{
     1079        return generic_recv(req, perr);
     1080}
     1081
     1082/*
     1083 * wrapper for push_database_old and push_database_new
     1084 */
     1085
     1086struct push_database_state {
     1087        bool old_done, new_done;
     1088};
     1089
     1090static void push_database_old_done(struct tevent_req *subreq);
     1091static void push_database_new_done(struct tevent_req *subreq);
     1092
     1093static struct tevent_req *push_database_send(
     1094                        TALLOC_CTX *mem_ctx,
     1095                        struct tevent_context *ev,
     1096                        struct ctdb_client_context *client,
     1097                        uint32_t *pnn_list, int count, uint32_t *caps,
     1098                        struct ctdb_tunable_list *tun_list,
     1099                        struct recdb_context *recdb)
     1100{
     1101        struct tevent_req *req, *subreq;
     1102        struct push_database_state *state;
     1103        uint32_t *old_list, *new_list;
     1104        int old_count, new_count;
     1105        int i;
     1106
     1107        req = tevent_req_create(mem_ctx, &state, struct push_database_state);
     1108        if (req == NULL) {
     1109                return NULL;
     1110        }
     1111
     1112        state->old_done = false;
     1113        state->new_done = false;
     1114
     1115        old_count = 0;
     1116        new_count = 0;
     1117        old_list = talloc_array(state, uint32_t, count);
     1118        new_list = talloc_array(state, uint32_t, count);
     1119        if (tevent_req_nomem(old_list, req) ||
     1120            tevent_req_nomem(new_list,req)) {
     1121                return tevent_req_post(req, ev);
     1122        }
     1123
     1124        for (i=0; i<count; i++) {
     1125                uint32_t pnn = pnn_list[i];
     1126
     1127                if (caps[pnn] & CTDB_CAP_FRAGMENTED_CONTROLS) {
     1128                        new_list[new_count] = pnn;
     1129                        new_count += 1;
     1130                } else {
     1131                        old_list[old_count] = pnn;
     1132                        old_count += 1;
     1133                }
     1134        }
     1135
     1136        if (old_count > 0) {
     1137                subreq = push_database_old_send(state, ev, client,
     1138                                                old_list, old_count, recdb);
     1139                if (tevent_req_nomem(subreq, req)) {
     1140                        return tevent_req_post(req, ev);
     1141                }
     1142                tevent_req_set_callback(subreq, push_database_old_done, req);
     1143        } else {
     1144                state->old_done = true;
     1145        }
     1146
     1147        if (new_count > 0) {
     1148                subreq = push_database_new_send(state, ev, client,
     1149                                                new_list, new_count, recdb,
     1150                                                tun_list->rec_buffer_size_limit);
     1151                if (tevent_req_nomem(subreq, req)) {
     1152                        return tevent_req_post(req, ev);
     1153                }
     1154                tevent_req_set_callback(subreq, push_database_new_done, req);
     1155        } else {
     1156                state->new_done = true;
     1157        }
     1158
     1159        return req;
     1160}
     1161
     1162static void push_database_old_done(struct tevent_req *subreq)
     1163{
     1164        struct tevent_req *req = tevent_req_callback_data(
     1165                subreq, struct tevent_req);
     1166        struct push_database_state *state = tevent_req_data(
     1167                req, struct push_database_state);
     1168        bool status;
     1169        int ret;
     1170
     1171        status = push_database_old_recv(subreq, &ret);
     1172        if (! status) {
     1173                tevent_req_error(req, ret);
     1174                return;
     1175        }
     1176
     1177        state->old_done = true;
     1178
     1179        if (state->old_done && state->new_done) {
     1180                tevent_req_done(req);
     1181        }
     1182}
     1183
     1184static void push_database_new_done(struct tevent_req *subreq)
     1185{
     1186        struct tevent_req *req = tevent_req_callback_data(
     1187                subreq, struct tevent_req);
     1188        struct push_database_state *state = tevent_req_data(
     1189                req, struct push_database_state);
     1190        bool status;
     1191        int ret;
     1192
     1193        status = push_database_new_recv(subreq, &ret);
     1194        if (! status) {
     1195                tevent_req_error(req, ret);
     1196                return;
     1197        }
     1198
     1199        state->new_done = true;
     1200
     1201        if (state->old_done && state->new_done) {
     1202                tevent_req_done(req);
     1203        }
     1204}
     1205
     1206static bool push_database_recv(struct tevent_req *req, int *perr)
     1207{
     1208        return generic_recv(req, perr);
    2711209}
    2721210
     
    2801218        uint32_t *pnn_list;
    2811219        int count;
     1220        uint32_t *caps;
     1221        uint32_t *ban_credits;
    2821222        uint32_t db_id;
    2831223        struct recdb_context *recdb;
     
    2921232                        struct tevent_context *ev,
    2931233                        struct ctdb_client_context *client,
    294                         uint32_t *pnn_list, int count,
    295                         uint32_t db_id, struct recdb_context *recdb)
     1234                        uint32_t *pnn_list, int count, uint32_t *caps,
     1235                        uint32_t *ban_credits, uint32_t db_id,
     1236                        struct recdb_context *recdb)
    2961237{
    2971238        struct tevent_req *req, *subreq;
     
    3091250        state->pnn_list = pnn_list;
    3101251        state->count = count;
     1252        state->caps = caps;
     1253        state->ban_credits = ban_credits;
    3111254        state->db_id = db_id;
    3121255        state->recdb = recdb;
     
    3321275                req, struct collect_highseqnum_db_state);
    3331276        struct ctdb_reply_control **reply;
    334         struct ctdb_req_control request;
    335         struct ctdb_pulldb pulldb;
    3361277        int *err_list;
    3371278        bool status;
     
    3801321            recdb_name(state->recdb), state->max_pnn, max_seqnum);
    3811322
    382         pulldb.db_id = state->db_id;
    383         pulldb.lmaster = CTDB_LMASTER_ANY;
    384 
    385         ctdb_req_control_pull_db(&request, &pulldb);
    386         subreq = ctdb_client_control_send(state, state->ev, state->client,
    387                                           state->max_pnn, TIMEOUT(), &request);
     1323        subreq = pull_database_send(state, state->ev, state->client,
     1324                                    state->max_pnn,
     1325                                    state->caps[state->max_pnn],
     1326                                    state->recdb);
    3881327        if (tevent_req_nomem(subreq, req)) {
    3891328                return;
     
    3991338        struct collect_highseqnum_db_state *state = tevent_req_data(
    4001339                req, struct collect_highseqnum_db_state);
    401         struct ctdb_reply_control *reply;
    402         struct ctdb_rec_buffer *recbuf;
    403         int ret;
    404         bool status;
    405 
    406         status = ctdb_client_control_recv(subreq, &ret, state, &reply);
    407         TALLOC_FREE(subreq);
    408         if (! status) {
    409                 LOG("control PULL_DB failed for %s on node %u, ret=%d\n",
    410                     recdb_name(state->recdb), state->max_pnn, ret);
    411                 tevent_req_error(req, ret);
    412                 return;
    413         }
    414 
    415         ret = ctdb_reply_control_pull_db(reply, state, &recbuf);
    416         if (ret != 0) {
    417                 tevent_req_error(req, EPROTO);
    418                 return;
    419         }
    420 
    421         talloc_free(reply);
    422 
    423         ret = recdb_add(state->recdb, ctdb_client_pnn(state->client), recbuf);
    424         talloc_free(recbuf);
    425         if (! ret) {
    426                 tevent_req_error(req, EIO);
     1340        int ret;
     1341        bool status;
     1342
     1343        status = pull_database_recv(subreq, &ret);
     1344        TALLOC_FREE(subreq);
     1345        if (! status) {
     1346                state->ban_credits[state->max_pnn] += 1;
     1347                tevent_req_error(req, ret);
    4271348                return;
    4281349        }
     
    4331354static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr)
    4341355{
    435         int err;
    436 
    437         if (tevent_req_is_unix_error(req, &err)) {
    438                 if (perr != NULL) {
    439                         *perr = err;
    440                 }
    441                 return false;
    442         }
    443 
    444         return true;
     1356        return generic_recv(req, perr);
    4451357}
    4461358
     
    4541366        uint32_t *pnn_list;
    4551367        int count;
     1368        uint32_t *caps;
     1369        uint32_t *ban_credits;
    4561370        uint32_t db_id;
    4571371        struct recdb_context *recdb;
     
    4661380                        struct tevent_context *ev,
    4671381                        struct ctdb_client_context *client,
    468                         uint32_t *pnn_list, int count,
    469                         uint32_t db_id, struct recdb_context *recdb)
     1382                        uint32_t *pnn_list, int count, uint32_t *caps,
     1383                        uint32_t *ban_credits, uint32_t db_id,
     1384                        struct recdb_context *recdb)
    4701385{
    4711386        struct tevent_req *req, *subreq;
    4721387        struct collect_all_db_state *state;
    473         struct ctdb_req_control request;
     1388        uint32_t pnn;
    4741389
    4751390        req = tevent_req_create(mem_ctx, &state,
     
    4831398        state->pnn_list = pnn_list;
    4841399        state->count = count;
     1400        state->caps = caps;
     1401        state->ban_credits = ban_credits;
    4851402        state->db_id = db_id;
    4861403        state->recdb = recdb;
    487 
    488         state->pulldb.db_id = db_id;
    489         state->pulldb.lmaster = CTDB_LMASTER_ANY;
    490 
    4911404        state->index = 0;
    4921405
    493         ctdb_req_control_pull_db(&request, &state->pulldb);
    494         subreq = ctdb_client_control_send(state, ev, client,
    495                                           state->pnn_list[state->index],
    496                                           TIMEOUT(), &request);
     1406        pnn = state->pnn_list[state->index];
     1407
     1408        subreq = pull_database_send(state, ev, client, pnn, caps[pnn], recdb);
    4971409        if (tevent_req_nomem(subreq, req)) {
    4981410                return tevent_req_post(req, ev);
     
    5091421        struct collect_all_db_state *state = tevent_req_data(
    5101422                req, struct collect_all_db_state);
    511         struct ctdb_reply_control *reply;
    512         struct ctdb_req_control request;
    513         struct ctdb_rec_buffer *recbuf;
    514         int ret;
    515         bool status;
    516 
    517         status = ctdb_client_control_recv(subreq, &ret, state, &reply);
    518         TALLOC_FREE(subreq);
    519         if (! status) {
    520                 LOG("control PULL_DB failed for %s from node %u, ret=%d\n",
    521                     recdb_name(state->recdb), state->pnn_list[state->index],
    522                     ret);
    523                 tevent_req_error(req, ret);
    524                 return;
    525         }
    526 
    527         ret = ctdb_reply_control_pull_db(reply, state, &recbuf);
    528         if (ret != 0) {
    529                 LOG("control PULL_DB failed for %s, ret=%d\n",
    530                     recdb_name(state->recdb), ret);
    531                 tevent_req_error(req, EPROTO);
    532                 return;
    533         }
    534 
    535         talloc_free(reply);
    536 
    537         status = recdb_add(state->recdb, ctdb_client_pnn(state->client), recbuf);
    538         talloc_free(recbuf);
    539         if (! status) {
    540                 tevent_req_error(req, EIO);
     1423        uint32_t pnn;
     1424        int ret;
     1425        bool status;
     1426
     1427        status = pull_database_recv(subreq, &ret);
     1428        TALLOC_FREE(subreq);
     1429        if (! status) {
     1430                pnn = state->pnn_list[state->index];
     1431                state->ban_credits[pnn] += 1;
     1432                tevent_req_error(req, ret);
    5411433                return;
    5421434        }
     
    5481440        }
    5491441
    550         ctdb_req_control_pull_db(&request, &state->pulldb);
    551         subreq = ctdb_client_control_send(state, state->ev, state->client,
    552                                           state->pnn_list[state->index],
    553                                           TIMEOUT(), &request);
     1442        pnn = state->pnn_list[state->index];
     1443        subreq = pull_database_send(state, state->ev, state->client,
     1444                                    pnn, state->caps[pnn], state->recdb);
    5541445        if (tevent_req_nomem(subreq, req)) {
    5551446                return;
     
    5601451static bool collect_all_db_recv(struct tevent_req *req, int *perr)
    5611452{
    562         int err;
    563 
    564         if (tevent_req_is_unix_error(req, &err)) {
    565                 if (perr != NULL) {
    566                         *perr = err;
    567                 }
    568                 return false;
    569         }
    570 
    571         return true;
     1453        return generic_recv(req, perr);
    5721454}
    5731455
     
    5921474        uint32_t *pnn_list;
    5931475        int count;
     1476        uint32_t *caps;
     1477        uint32_t *ban_credits;
    5941478        uint32_t db_id;
    5951479        bool persistent;
     
    6001484        const char *db_name, *db_path;
    6011485        struct recdb_context *recdb;
    602         struct ctdb_rec_buffer *recbuf;
    603 
    6041486};
    6051487
     
    6191501                                          struct ctdb_tunable_list *tun_list,
    6201502                                          uint32_t *pnn_list, int count,
     1503                                          uint32_t *caps,
     1504                                          uint32_t *ban_credits,
    6211505                                          uint32_t generation,
    6221506                                          uint32_t db_id, bool persistent)
     
    6361520        state->pnn_list = pnn_list;
    6371521        state->count = count;
     1522        state->caps = caps;
     1523        state->ban_credits = ban_credits;
    6381524        state->db_id = db_id;
    6391525        state->persistent = persistent;
     
    8201706                subreq = collect_highseqnum_db_send(
    8211707                                state, state->ev, state->client,
    822                                 state->pnn_list, state->count,
    823                                 state->db_id, state->recdb);
     1708                                state->pnn_list, state->count, state->caps,
     1709                                state->ban_credits, state->db_id,
     1710                                state->recdb);
    8241711        } else {
    8251712                subreq = collect_all_db_send(
    8261713                                state, state->ev, state->client,
    827                                 state->pnn_list, state->count,
    828                                 state->db_id, state->recdb);
     1714                                state->pnn_list, state->count, state->caps,
     1715                                state->ban_credits, state->db_id,
     1716                                state->recdb);
    8291717        }
    8301718        if (tevent_req_nomem(subreq, req)) {
     
    8721760        struct recover_db_state *state = tevent_req_data(
    8731761                req, struct recover_db_state);
    874         struct ctdb_req_control request;
    8751762        int *err_list;
    8761763        int ret;
     
    8981785        }
    8991786
    900         state->recbuf = recdb_records(state->recdb, state, state->destnode);
    901         if (tevent_req_nomem(state->recbuf, req)) {
    902                 return;
    903         }
    904 
    905         TALLOC_FREE(state->recdb);
    906 
    907         ctdb_req_control_push_db(&request, state->recbuf);
    908         subreq = ctdb_client_control_multi_send(state, state->ev,
    909                                                 state->client,
    910                                                 state->pnn_list, state->count,
    911                                                 TIMEOUT(), &request);
     1787        subreq = push_database_send(state, state->ev, state->client,
     1788                                    state->pnn_list, state->count,
     1789                                    state->caps, state->tun_list,
     1790                                    state->recdb);
    9121791        if (tevent_req_nomem(subreq, req)) {
    9131792                return;
     
    9231802                req, struct recover_db_state);
    9241803        struct ctdb_req_control request;
    925         int *err_list;
    926         int ret;
    927         bool status;
    928 
    929         status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
    930                                                 NULL);
    931         TALLOC_FREE(subreq);
    932         if (! status) {
    933                 int ret2;
    934                 uint32_t pnn;
    935 
    936                 ret2 = ctdb_client_control_multi_error(state->pnn_list,
    937                                                        state->count,
    938                                                        err_list, &pnn);
    939                 if (ret2 != 0) {
    940                         LOG("control PUSHDB failed for db %s on node %u,"
    941                             " ret=%d\n", state->db_name, pnn, ret2);
    942                 } else {
    943                         LOG("control PUSHDB failed for db %s, ret=%d\n",
    944                             state->db_name, ret);
    945                 }
    946                 tevent_req_error(req, ret);
    947                 return;
    948         }
    949 
    950         TALLOC_FREE(state->recbuf);
     1804        int ret;
     1805        bool status;
     1806
     1807        status = push_database_recv(subreq, &ret);
     1808        TALLOC_FREE(subreq);
     1809        if (! status) {
     1810                tevent_req_error(req, ret);
     1811                return;
     1812        }
     1813
     1814        TALLOC_FREE(state->recdb);
    9511815
    9521816        ctdb_req_control_db_transaction_commit(&request, &state->transdb);
     
    10401904static bool recover_db_recv(struct tevent_req *req)
    10411905{
    1042         int err;
    1043 
    1044         if (tevent_req_is_unix_error(req, &err)) {
    1045                 return false;
    1046         }
    1047 
    1048         return true;
     1906        return generic_recv(req, NULL);
    10491907}
    10501908
     
    10701928        uint32_t *pnn_list;
    10711929        int count;
     1930        uint32_t *caps;
     1931        uint32_t *ban_credits;
    10721932        uint32_t generation;
    10731933        uint32_t db_id;
     
    10841944                                           struct ctdb_tunable_list *tun_list,
    10851945                                           uint32_t *pnn_list, int count,
     1946                                           uint32_t *caps,
     1947                                           uint32_t *ban_credits,
    10861948                                           uint32_t generation)
    10871949{
     
    11191981                substate->pnn_list = pnn_list;
    11201982                substate->count = count;
     1983                substate->caps = caps;
     1984                substate->ban_credits = ban_credits;
    11211985                substate->generation = generation;
    11221986                substate->db_id = dbmap->dbs[i].db_id;
     
    11251989
    11261990                subreq = recover_db_send(state, ev, client, tun_list,
    1127                                          pnn_list, count, generation,
    1128                                          substate->db_id,
     1991                                         pnn_list, count, caps, ban_credits,
     1992                                         generation, substate->db_id,
    11291993                                         substate->persistent);
    11301994                if (tevent_req_nomem(subreq, req)) {
     
    11572021
    11582022        substate->num_fails += 1;
    1159         if (substate->num_fails < 5) {
     2023        if (substate->num_fails < NUM_RETRIES) {
    11602024                subreq = recover_db_send(state, state->ev, substate->client,
    11612025                                         substate->tun_list,
    11622026                                         substate->pnn_list, substate->count,
     2027                                         substate->caps, substate->ban_credits,
    11632028                                         substate->generation, substate->db_id,
    11642029                                         substate->persistent);
     
    12072072 * Run the parallel database recovery
    12082073 *
     2074 * - Get tunables
    12092075 * - Get nodemap
    12102076 * - Get vnnmap
    12112077 * - Get capabilities from all nodes
    1212  * - Get tunables from all nodes
    12132078 * - Get dbmap
    12142079 * - Set RECOVERY_ACTIVE
     
    12292094        struct ctdb_node_map *nodemap;
    12302095        uint32_t *caps;
     2096        uint32_t *ban_credits;
    12312097        struct ctdb_tunable_list *tun_list;
    12322098        struct ctdb_vnn_map *vnnmap;
     
    12342100};
    12352101
     2102static void recovery_tunables_done(struct tevent_req *subreq);
    12362103static void recovery_nodemap_done(struct tevent_req *subreq);
    12372104static void recovery_vnnmap_done(struct tevent_req *subreq);
    12382105static void recovery_capabilities_done(struct tevent_req *subreq);
    1239 static void recovery_tunables_done(struct tevent_req *subreq);
    12402106static void recovery_dbmap_done(struct tevent_req *subreq);
    12412107static void recovery_active_done(struct tevent_req *subreq);
     
    12432109static void recovery_vnnmap_update_done(struct tevent_req *subreq);
    12442110static void recovery_db_recovery_done(struct tevent_req *subreq);
     2111static void recovery_failed_done(struct tevent_req *subreq);
    12452112static void recovery_normal_done(struct tevent_req *subreq);
    12462113static void recovery_end_recovery_done(struct tevent_req *subreq);
     
    12652132        state->destnode = ctdb_client_pnn(client);
    12662133
    1267         ctdb_req_control_get_nodemap(&request);
    1268         subreq = ctdb_client_control_send(mem_ctx, ev, client, state->destnode,
    1269                                           TIMEOUT(), &request);
     2134        ctdb_req_control_get_all_tunables(&request);
     2135        subreq = ctdb_client_control_send(state, state->ev, state->client,
     2136                                          state->destnode, TIMEOUT(),
     2137                                          &request);
    12702138        if (tevent_req_nomem(subreq, req)) {
    12712139                return tevent_req_post(req, ev);
    12722140        }
    1273         tevent_req_set_callback(subreq, recovery_nodemap_done, req);
     2141        tevent_req_set_callback(subreq, recovery_tunables_done, req);
    12742142
    12752143        return req;
    12762144}
    12772145
    1278 static void recovery_nodemap_done(struct tevent_req *subreq)
     2146static void recovery_tunables_done(struct tevent_req *subreq)
    12792147{
    12802148        struct tevent_req *req = tevent_req_callback_data(
     
    12842152        struct ctdb_reply_control *reply;
    12852153        struct ctdb_req_control request;
     2154        int ret;
     2155        bool status;
     2156
     2157        status = ctdb_client_control_recv(subreq, &ret, state, &reply);
     2158        TALLOC_FREE(subreq);
     2159        if (! status) {
     2160                LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
     2161                tevent_req_error(req, ret);
     2162                return;
     2163        }
     2164
     2165        ret = ctdb_reply_control_get_all_tunables(reply, state,
     2166                                                  &state->tun_list);
     2167        if (ret != 0) {
     2168                LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
     2169                tevent_req_error(req, EPROTO);
     2170                return;
     2171        }
     2172
     2173        talloc_free(reply);
     2174
     2175        recover_timeout = state->tun_list->recover_timeout;
     2176
     2177        ctdb_req_control_get_nodemap(&request);
     2178        subreq = ctdb_client_control_send(state, state->ev, state->client,
     2179                                          state->destnode, TIMEOUT(),
     2180                                          &request);
     2181        if (tevent_req_nomem(subreq, req)) {
     2182                return;
     2183        }
     2184        tevent_req_set_callback(subreq, recovery_nodemap_done, req);
     2185}
     2186
     2187static void recovery_nodemap_done(struct tevent_req *subreq)
     2188{
     2189        struct tevent_req *req = tevent_req_callback_data(
     2190                subreq, struct tevent_req);
     2191        struct recovery_state *state = tevent_req_data(
     2192                req, struct recovery_state);
     2193        struct ctdb_reply_control *reply;
     2194        struct ctdb_req_control request;
    12862195        bool status;
    12872196        int ret;
     
    13072216        if (state->count <= 0) {
    13082217                tevent_req_error(req, ENOMEM);
     2218                return;
     2219        }
     2220
     2221        state->ban_credits = talloc_zero_array(state, uint32_t,
     2222                                               state->nodemap->num);
     2223        if (tevent_req_nomem(state->ban_credits, req)) {
    13092224                return;
    13102225        }
     
    14082323                        return;
    14092324                }
    1410         }
    1411 
    1412         talloc_free(reply);
    1413 
    1414         ctdb_req_control_get_all_tunables(&request);
    1415         subreq = ctdb_client_control_send(state, state->ev, state->client,
    1416                                           state->destnode, TIMEOUT(),
    1417                                           &request);
    1418         if (tevent_req_nomem(subreq, req)) {
    1419                 return;
    1420         }
    1421         tevent_req_set_callback(subreq, recovery_tunables_done, req);
    1422 }
    1423 
    1424 static void recovery_tunables_done(struct tevent_req *subreq)
    1425 {
    1426         struct tevent_req *req = tevent_req_callback_data(
    1427                 subreq, struct tevent_req);
    1428         struct recovery_state *state = tevent_req_data(
    1429                 req, struct recovery_state);
    1430         struct ctdb_reply_control *reply;
    1431         struct ctdb_req_control request;
    1432         int ret;
    1433         bool status;
    1434 
    1435         status = ctdb_client_control_recv(subreq, &ret, state, &reply);
    1436         TALLOC_FREE(subreq);
    1437         if (! status) {
    1438                 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
    1439                 tevent_req_error(req, ret);
    1440                 return;
    1441         }
    1442 
    1443         ret = ctdb_reply_control_get_all_tunables(reply, state,
    1444                                                   &state->tun_list);
    1445         if (ret != 0) {
    1446                 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
    1447                 tevent_req_error(req, EPROTO);
    1448                 return;
    14492325        }
    14502326
     
    16752551                                  state->dbmap, state->tun_list,
    16762552                                  state->pnn_list, state->count,
     2553                                  state->caps, state->ban_credits,
    16772554                                  state->vnnmap->generation);
    16782555        if (tevent_req_nomem(subreq, req)) {
     
    16952572        TALLOC_FREE(subreq);
    16962573
    1697         LOG("%d databases recovered\n", count);
    1698 
    1699         if (! status) {
    1700                 tevent_req_error(req, EIO);
     2574        LOG("%d of %d databases recovered\n", count, state->dbmap->num);
     2575
     2576        if (! status) {
     2577                uint32_t max_pnn = CTDB_UNKNOWN_PNN, max_credits = 0;
     2578                int i;
     2579
     2580                /* Bans are not enabled */
     2581                if (state->tun_list->enable_bans == 0) {
     2582                        tevent_req_error(req, EIO);
     2583                        return;
     2584                }
     2585
     2586                for (i=0; i<state->count; i++) {
     2587                        uint32_t pnn;
     2588                        pnn = state->pnn_list[i];
     2589                        if (state->ban_credits[pnn] > max_credits) {
     2590                                max_pnn = pnn;
     2591                                max_credits = state->ban_credits[pnn];
     2592                        }
     2593                }
     2594
     2595                /* If pulling database fails multiple times */
     2596                if (max_credits >= NUM_RETRIES) {
     2597                        struct ctdb_req_message message;
     2598
     2599                        LOG("Assigning banning credits to node %u\n", max_pnn);
     2600
     2601                        message.srvid = CTDB_SRVID_BANNING;
     2602                        message.data.pnn = max_pnn;
     2603
     2604                        subreq = ctdb_client_message_send(
     2605                                        state, state->ev, state->client,
     2606                                        ctdb_client_pnn(state->client),
     2607                                        &message);
     2608                        if (tevent_req_nomem(subreq, req)) {
     2609                                return;
     2610                        }
     2611                        tevent_req_set_callback(subreq, recovery_failed_done,
     2612                                                req);
     2613                } else {
     2614                        tevent_req_error(req, EIO);
     2615                }
    17012616                return;
    17022617        }
     
    17112626        }
    17122627        tevent_req_set_callback(subreq, recovery_normal_done, req);
     2628}
     2629
     2630static void recovery_failed_done(struct tevent_req *subreq)
     2631{
     2632        struct tevent_req *req = tevent_req_callback_data(
     2633                subreq, struct tevent_req);
     2634        int ret;
     2635        bool status;
     2636
     2637        status = ctdb_client_message_recv(subreq, &ret);
     2638        TALLOC_FREE(subreq);
     2639        if (! status) {
     2640                LOG("failed to assign banning credits, ret=%d\n", ret);
     2641        }
     2642
     2643        tevent_req_error(req, EIO);
    17132644}
    17142645
     
    17952726static void recovery_recv(struct tevent_req *req, int *perr)
    17962727{
    1797         int err;
    1798 
    1799         if (tevent_req_is_unix_error(req, &err)) {
    1800                 if (perr != NULL) {
    1801                         *perr = err;
    1802                 }
    1803                 return;
    1804         }
     2728        generic_recv(req, perr);
    18052729}
    18062730
  • vendor/current/ctdb/server/ctdb_takeover.c

    r988 r989  
    404404}
    405405
    406 struct takeover_callback_state {
    407         struct ctdb_req_control_old *c;
    408         ctdb_sock_addr *addr;
    409         struct ctdb_vnn *vnn;
    410 };
    411 
    412406struct ctdb_do_takeip_state {
    413407        struct ctdb_req_control_old *c;
     
    502496        CTDB_NO_MEMORY(ctdb, state);
    503497
    504         state->c = talloc_steal(ctdb, c);
     498        state->c = NULL;
    505499        state->vnn   = vnn;
    506500
     
    531525        }
    532526
     527        state->c = talloc_steal(ctdb, c);
    533528        return 0;
    534529}
     
    639634        CTDB_NO_MEMORY(ctdb, state);
    640635
    641         state->c = talloc_steal(ctdb, c);
     636        state->c = NULL;
    642637        state->old = old;
    643638        state->vnn = vnn;
     
    671666        }
    672667
     668        state->c = talloc_steal(ctdb, c);
    673669        return 0;
    674670}
     
    816812}
    817813
    818 /*
    819   kill any clients that are registered with a IP that is being released
    820  */
    821 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
    822 {
    823         struct ctdb_client_ip *ip;
    824 
    825         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
    826                 ctdb_addr_to_str(addr)));
    827 
    828         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
    829                 ctdb_sock_addr tmp_addr;
    830 
    831                 tmp_addr = ip->addr;
    832                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
    833                         ip->client_id,
    834                         ctdb_addr_to_str(&ip->addr)));
    835 
    836                 if (ctdb_same_ip(&tmp_addr, addr)) {
    837                         struct ctdb_client *client = reqid_find(ctdb->idr,
    838                                                                 ip->client_id,
    839                                                                 struct ctdb_client);
    840                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
    841                                 ip->client_id,
    842                                 ctdb_addr_to_str(&ip->addr),
    843                                 client->pid));
    844 
    845                         if (client->pid != 0) {
    846                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
    847                                         (unsigned)client->pid,
    848                                         ctdb_addr_to_str(addr),
    849                                         ip->client_id));
    850                                 kill(client->pid, SIGKILL);
    851                         }
    852                 }
    853         }
    854 }
    855 
    856814static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
    857815{
     
    862820}
    863821
     822static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
     823                                        struct ctdb_vnn *vnn,
     824                                        ctdb_sock_addr *addr)
     825{
     826        TDB_DATA data;
     827
     828        /* Send a message to all clients of this node telling them
     829         * that the cluster has been reconfigured and they should
     830         * close any connections on this IP address
     831         */
     832        data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
     833        data.dsize = strlen((char *)data.dptr)+1;
     834        DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
     835        ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
     836
     837        ctdb_vnn_unassign_iface(ctdb, vnn);
     838
     839        /* Process the IP if it has been marked for deletion */
     840        if (vnn->delete_pending) {
     841                do_delete_ip(ctdb, vnn);
     842                return NULL;
     843        }
     844
     845        return vnn;
     846}
     847
     848struct release_ip_callback_state {
     849        struct ctdb_req_control_old *c;
     850        ctdb_sock_addr *addr;
     851        struct ctdb_vnn *vnn;
     852        uint32_t target_pnn;
     853};
     854
    864855/*
    865856  called when releaseip event finishes
    866857 */
    867 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
     858static void release_ip_callback(struct ctdb_context *ctdb, int status,
    868859                                void *private_data)
    869860{
    870         struct takeover_callback_state *state =
    871                 talloc_get_type(private_data, struct takeover_callback_state);
    872         TDB_DATA data;
     861        struct release_ip_callback_state *state =
     862                talloc_get_type(private_data, struct release_ip_callback_state);
    873863
    874864        if (status == -ETIME) {
     
    888878        }
    889879
    890         /* send a message to all clients of this node telling them
    891            that the cluster has been reconfigured and they should
    892            release any sockets on this IP */
    893         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
    894         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
    895         data.dsize = strlen((char *)data.dptr)+1;
    896 
    897         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
    898 
    899         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
    900 
    901         /* kill clients that have registered with this IP */
    902         release_kill_clients(ctdb, state->addr);
    903 
    904         ctdb_vnn_unassign_iface(ctdb, state->vnn);
    905 
    906         /* Process the IP if it has been marked for deletion */
    907         if (state->vnn->delete_pending) {
    908                 do_delete_ip(ctdb, state->vnn);
    909                 state->vnn = NULL;
    910         }
     880        state->vnn->pnn = state->target_pnn;
     881        state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
    911882
    912883        /* the control succeeded */
     
    915886}
    916887
    917 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
     888static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
    918889{
    919890        if (state->vnn != NULL) {
     
    932903{
    933904        int ret;
    934         struct takeover_callback_state *state;
     905        struct release_ip_callback_state *state;
    935906        struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
    936907        struct ctdb_vnn *vnn;
     
    944915                return 0;
    945916        }
    946         vnn->pnn = pip->pnn;
    947917
    948918        /* stop any previous arps */
     
    950920        vnn->takeover_ctx = NULL;
    951921
    952         /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
    953          * lazy multicast to drop an IP from any node that isn't the
    954          * intended new node.  The following causes makes ctdbd ignore
    955          * a release for any address it doesn't host.
     922        /* RELEASE_IP controls are sent to all nodes that should not
     923         * be hosting a particular IP.  This serves 2 purposes.  The
     924         * first is to help resolve any inconsistencies.  If a node
     925         * does unexpectly host an IP then it will be released.  The
     926         * 2nd is to use a "redundant release" to tell non-takeover
     927         * nodes where an IP is moving to.  This is how "ctdb ip" can
     928         * report the (likely) location of an IP by only asking the
     929         * local node.  Redundant releases need to update the PNN but
     930         * are otherwise ignored.
    956931         */
    957932        if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
     
    961936                                vnn->public_netmask_bits,
    962937                                ctdb_vnn_iface_string(vnn)));
     938                        vnn->pnn = pip->pnn;
    963939                        ctdb_vnn_unassign_iface(ctdb, vnn);
    964940                        return 0;
     
    969945                                           ctdb_addr_to_str(&pip->addr),
    970946                                           vnn->public_netmask_bits));
     947                        vnn->pnn = pip->pnn;
    971948                        return 0;
    972949                }
     
    994971                pip->pnn));
    995972
    996         state = talloc(ctdb, struct takeover_callback_state);
     973        state = talloc(ctdb, struct release_ip_callback_state);
    997974        if (state == NULL) {
    998975                ctdb_set_error(ctdb, "Out of memory at %s:%d",
     
    1002979        }
    1003980
    1004         state->c = talloc_steal(state, c);
    1005         state->addr = talloc(state, ctdb_sock_addr);       
     981        state->c = NULL;
     982        state->addr = talloc(state, ctdb_sock_addr);
    1006983        if (state->addr == NULL) {
    1007984                ctdb_set_error(ctdb, "Out of memory at %s:%d",
     
    1012989        }
    1013990        *state->addr = pip->addr;
     991        state->target_pnn = pip->pnn;
    1014992        state->vnn   = vnn;
    1015993
     
    10351013        /* tell the control that we will be reply asynchronously */
    10361014        *async_reply = true;
     1015        state->c = talloc_steal(state, c);
    10371016        return 0;
    10381017}
     
    17761755        bool can_host_ips;
    17771756
     1757        /* Default timeout for early jump to IPREALLOCATED.  See below
     1758         * for explanation of 3 times... */
     1759        timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
     1760
    17781761        /*
    17791762         * ip failover is completely disabled, just send out the
     
    18521835
    18531836        ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
     1837
     1838        /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
     1839         * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
     1840         * seconds.  However, RELEASE_IP can take longer due to TCP
     1841         * connection killing, so sometimes needs more time.
     1842         * Therefore, use a cumulative timeout of TakeoverTimeout * 3
     1843         * seconds across all 3 stages.  No explicit expiry checks are
     1844         * needed before each stage because tevent is smart enough to
     1845         * fire the timeouts even if they are in the past.  Initialise
     1846         * this here so it explicitly covers the stages we're
     1847         * interested in but, in particular, not the time taken by the
     1848         * ipalloc().
     1849         */
     1850        timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
    18541851
    18551852        /* Send a RELEASE_IP to all nodes that should not be hosting
     
    18751872                        ip.addr = tmp_ip->addr;
    18761873
    1877                         timeout = TAKEOVER_TIMEOUT();
    18781874                        data.dsize = sizeof(ip);
    18791875                        data.dptr  = (uint8_t *)&ip;
     
    19181914                ip.addr = tmp_ip->addr;
    19191915
    1920                 timeout = TAKEOVER_TIMEOUT();
    19211916                data.dsize = sizeof(ip);
    19221917                data.dptr  = (uint8_t *)&ip;
     
    19561951        nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
    19571952        ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
    1958                                         nodes, 0, TAKEOVER_TIMEOUT(),
     1953                                        nodes, 0, timeout,
    19591954                                        false, tdb_null,
    19601955                                        NULL, iprealloc_fail_callback,
     
    23772372void ctdb_release_all_ips(struct ctdb_context *ctdb)
    23782373{
    2379         struct ctdb_vnn *vnn;
     2374        struct ctdb_vnn *vnn, *next;
    23802375        int count = 0;
    23812376
     
    23842379        }
    23852380
    2386         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
     2381        for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
     2382                /* vnn can be freed below in release_ip_post() */
     2383                next = vnn->next;
     2384
    23872385                if (!ctdb_sys_have_ip(&vnn->public_address)) {
    23882386                        ctdb_vnn_unassign_iface(ctdb, vnn);
    2389                         continue;
    2390                 }
    2391                 if (!vnn->iface) {
    23922387                        continue;
    23932388                }
     
    24132408
    24142409                ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
    2415                                   ctdb_vnn_iface_string(vnn),
    2416                                   ctdb_addr_to_str(&vnn->public_address),
    2417                                   vnn->public_netmask_bits);
    2418                 release_kill_clients(ctdb, &vnn->public_address);
    2419                 ctdb_vnn_unassign_iface(ctdb, vnn);
    2420                 vnn->update_in_flight = false;
     2410                                       ctdb_vnn_iface_string(vnn),
     2411                                       ctdb_addr_to_str(&vnn->public_address),
     2412                                       vnn->public_netmask_bits);
     2413                /* releaseip timeouts are converted to success, so to
     2414                 * detect failures just check if the IP address is
     2415                 * still there...
     2416                 */
     2417                if (ctdb_sys_have_ip(&vnn->public_address)) {
     2418                        DEBUG(DEBUG_ERR,
     2419                              (__location__
     2420                               " IP address %s not released\n",
     2421                               ctdb_addr_to_str(&vnn->public_address)));
     2422                        vnn->update_in_flight = false;
     2423                        continue;
     2424                }
     2425
     2426                vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
     2427                if (vnn != NULL) {
     2428                        vnn->update_in_flight = false;
     2429                }
    24212430                count++;
    24222431        }
  • vendor/current/ctdb/server/ctdb_tunables.c

    r988 r989  
    4242        { "KeepaliveInterval",    5,  offsetof(struct ctdb_tunable_list, keepalive_interval), false },
    4343        { "KeepaliveLimit",       5,  offsetof(struct ctdb_tunable_list, keepalive_limit), false },
    44         { "RecoverTimeout",     120,  offsetof(struct ctdb_tunable_list, recover_timeout), false },
     44        { "RecoverTimeout",      30,  offsetof(struct ctdb_tunable_list, recover_timeout), false },
    4545        { "RecoverInterval",      1,  offsetof(struct ctdb_tunable_list, recover_interval), false },
    4646        { "ElectionTimeout",      3,  offsetof(struct ctdb_tunable_list, election_timeout), false },
     
    9494        { "TDBMutexEnabled", 0, offsetof(struct ctdb_tunable_list, mutex_enabled), false },
    9595        { "LockProcessesPerDB", 200, offsetof(struct ctdb_tunable_list, lock_processes_per_db), false },
     96        { "RecBufferSizeLimit", 1000000, offsetof(struct ctdb_tunable_list, rec_buffer_size_limit), false },
    9697};
    9798
  • vendor/current/ctdb/server/ctdbd.c

    r988 r989  
    179179
    180180        ev = tevent_context_init(NULL);
     181        if (ev == NULL) {
     182                DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
     183                exit(1);
     184        }
    181185        tevent_loop_allow_nesting(ev);
    182186
  • vendor/current/ctdb/server/eventscript.c

    r988 r989  
    697697{
    698698        DLIST_REMOVE(callback->ctdb->script_callbacks, callback);
     699        return 0;
     700}
     701
     702struct schedule_callback_state {
     703        struct ctdb_context *ctdb;
     704        void (*callback)(struct ctdb_context *, int, void *);
     705        void *private_data;
     706        int status;
     707        struct tevent_immediate *im;
     708};
     709
     710static void schedule_callback_handler(struct tevent_context *ctx,
     711                                      struct tevent_immediate *im,
     712                                      void *private_data)
     713{
     714        struct schedule_callback_state *state =
     715                talloc_get_type_abort(private_data,
     716                                      struct schedule_callback_state);
     717
     718        if (state->callback != NULL) {
     719                state->callback(state->ctdb, state->status,
     720                                state->private_data);
     721        }
     722        talloc_free(state);
     723}
     724
     725static int
     726schedule_callback_immediate(struct ctdb_context *ctdb,
     727                            void (*callback)(struct ctdb_context *,
     728                                             int, void *),
     729                            void *private_data,
     730                            int status)
     731{
     732        struct schedule_callback_state *state;
     733        struct tevent_immediate *im;
     734
     735        state = talloc_zero(ctdb, struct schedule_callback_state);
     736        if (state == NULL) {
     737                DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
     738                return -1;
     739        }
     740        im = tevent_create_immediate(state);
     741        if (im == NULL) {
     742                DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
     743                talloc_free(state);
     744                return -1;
     745        }
     746
     747        state->ctdb = ctdb;
     748        state->callback = callback;
     749        state->private_data = private_data;
     750        state->status = status;
     751        state->im = im;
     752
     753        tevent_schedule_immediate(im, ctdb->ev,
     754                                  schedule_callback_handler, state);
    699755        return 0;
    700756}
     
    808864        state->child = 0;
    809865
     866        /* Nothing to do? */
     867        if (state->scripts->num_scripts == 0) {
     868                int ret = schedule_callback_immediate(ctdb, callback,
     869                                                      private_data, 0);
     870                talloc_free(state);
     871                if (ret != 0) {
     872                        DEBUG(DEBUG_ERR,
     873                              ("Unable to schedule callback for 0 scripts\n"));
     874                        return 1;
     875                }
     876                return 0;
     877        }
     878
     879        state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);
     880        if (state->scripts->scripts[0].status != 0) {
     881                talloc_free(state);
     882                return -1;
     883        }
     884
    810885        if (call == CTDB_EVENT_MONITOR) {
    811886                ctdb->current_monitor = state;
    812887        }
    813888
     889        ctdb->active_events++;
     890
    814891        talloc_set_destructor(state, event_script_destructor);
    815 
    816         ctdb->active_events++;
    817 
    818         /* Nothing to do? */
    819         if (state->scripts->num_scripts == 0) {
    820                 callback(ctdb, 0, private_data);
    821                 talloc_free(state);
    822                 return 0;
    823         }
    824 
    825         state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);
    826         if (state->scripts->scripts[0].status != 0) {
    827                 /* Callback is called from destructor, with fail result. */
    828                 talloc_free(state);
    829                 return 0;
    830         }
    831892
    832893        if (!timeval_is_zero(&state->timeout)) {
     
    10081069        CTDB_NO_MEMORY(ctdb, state);
    10091070
    1010         state->c = talloc_steal(state, c);
     1071        state->c = NULL;
    10111072
    10121073        DEBUG(DEBUG_NOTICE,("Running eventscripts with arguments %s\n", indata.dptr));
     
    10241085        /* tell ctdb_control.c that we will be replying asynchronously */
    10251086        *async_reply = true;
    1026 
     1087        state->c = talloc_steal(state, c);
    10271088        return 0;
    10281089}
  • vendor/current/ctdb/tests/src/ctdbd_test.c

    r988 r989  
    4848#include "common/reqid.c"
    4949#include "common/logging.c"
     50#include "common/pidfile.c"
    5051
    5152/* CTDB_SERVER_OBJ */
  • vendor/current/ctdb/tests/src/protocol_client_test.c

    r988 r989  
    657657                cd->data.db_id = rand32();
    658658                break;
     659
     660        case CTDB_CONTROL_DB_PULL:
     661                cd->data.pulldb_ext = talloc(mem_ctx, struct ctdb_pulldb_ext);
     662                assert(cd->data.pulldb_ext != NULL);
     663                fill_ctdb_pulldb_ext(mem_ctx, cd->data.pulldb_ext);
     664                break;
     665
     666        case CTDB_CONTROL_DB_PUSH_START:
     667                cd->data.pulldb_ext = talloc(mem_ctx, struct ctdb_pulldb_ext);
     668                assert(cd->data.pulldb_ext != NULL);
     669                fill_ctdb_pulldb_ext(mem_ctx, cd->data.pulldb_ext);
     670                break;
     671
     672        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     673                cd->data.db_id = rand32();
     674                break;
     675
    659676        }
    660677}
     
    11041121                assert(cd->data.db_id == cd2->data.db_id);
    11051122                break;
     1123
     1124        case CTDB_CONTROL_DB_PULL:
     1125                verify_ctdb_pulldb_ext(cd->data.pulldb_ext,
     1126                                       cd2->data.pulldb_ext);
     1127                break;
     1128
     1129        case CTDB_CONTROL_DB_PUSH_START:
     1130                verify_ctdb_pulldb_ext(cd->data.pulldb_ext,
     1131                                       cd2->data.pulldb_ext);
     1132                break;
     1133
     1134        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1135                assert(cd->data.db_id == cd2->data.db_id);
     1136                break;
     1137
    11061138        }
    11071139}
     
    15601592                break;
    15611593
     1594        case CTDB_CONTROL_DB_PULL:
     1595                cd->data.num_records = rand32();
     1596                break;
     1597
     1598        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1599                cd->data.num_records = rand32();
     1600                break;
     1601
    15621602        }
    15631603}
     
    19411981        case CTDB_CONTROL_GET_NODES_FILE:
    19421982                verify_ctdb_node_map(cd->data.nodemap, cd2->data.nodemap);
     1983                break;
     1984
     1985        case CTDB_CONTROL_DB_PULL:
     1986                assert(cd->data.num_records == cd2->data.num_records);
     1987                break;
     1988
     1989        case CTDB_CONTROL_DB_PUSH_CONFIRM:
     1990                assert(cd->data.num_records == cd2->data.num_records);
    19431991                break;
    19441992
  • vendor/current/ctdb/tests/src/protocol_types_test.c

    r988 r989  
    2020#include "replace.h"
    2121#include "system/network.h"
     22#include "system/filesys.h"
    2223
    2324#include <assert.h>
     
    181182        assert(p1->db_id == p2->db_id);
    182183        assert(p1->lmaster == p2->lmaster);
     184}
     185
     186static void fill_ctdb_pulldb_ext(TALLOC_CTX *mem_ctx,
     187                                 struct ctdb_pulldb_ext *p)
     188{
     189        p->db_id = rand32();
     190        p->lmaster = rand32();
     191        p->srvid = rand64();
     192}
     193
     194static void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1,
     195                                   struct ctdb_pulldb_ext *p2)
     196{
     197        assert(p1->db_id == p2->db_id);
     198        assert(p1->lmaster == p2->lmaster);
     199        assert(p1->srvid == p2->srvid);
    183200}
    184201
     
    11781195DEFINE_TEST(struct ctdb_dbid_map, ctdb_dbid_map);
    11791196DEFINE_TEST(struct ctdb_pulldb, ctdb_pulldb);
     1197DEFINE_TEST(struct ctdb_pulldb_ext, ctdb_pulldb_ext);
    11801198DEFINE_TEST(struct ctdb_rec_data, ctdb_rec_data);
    11811199DEFINE_TEST(struct ctdb_rec_buffer, ctdb_rec_buffer);
     
    12191237DEFINE_TEST(struct ctdb_g_lock_list, ctdb_g_lock_list);
    12201238
     1239static void test_ctdb_rec_buffer_read_write(void)
     1240{
     1241        TALLOC_CTX *mem_ctx = talloc_new(NULL);
     1242        struct ctdb_rec_buffer *p1, **p2;
     1243        const char *filename = "ctdb_rec_buffer_test.dat";
     1244        int count = 100;
     1245        int fd, i, ret;
     1246        off_t offset;
     1247
     1248        p1 = talloc_array(mem_ctx, struct ctdb_rec_buffer, count);
     1249        assert(p1 != NULL);
     1250        for (i=0; i<count; i++) {
     1251                fill_ctdb_rec_buffer(mem_ctx, &p1[i]);
     1252        }
     1253
     1254        fd = open(filename, O_RDWR|O_CREAT, 0600);
     1255        assert(fd != -1);
     1256        unlink(filename);
     1257
     1258        for (i=0; i<count; i++) {
     1259                ret = ctdb_rec_buffer_write(&p1[i], fd);
     1260                assert(ret == 0);
     1261        }
     1262
     1263        offset = lseek(fd, 0, SEEK_CUR);
     1264        assert(offset != -1);
     1265        offset = lseek(fd, -offset, SEEK_CUR);
     1266        assert(offset == 0);
     1267
     1268        p2 = talloc_array(mem_ctx, struct ctdb_rec_buffer *, count);
     1269        assert(p2 != NULL);
     1270
     1271        for (i=0; i<count; i++) {
     1272                ret = ctdb_rec_buffer_read(fd, mem_ctx, &p2[i]);
     1273                assert(ret == 0);
     1274        }
     1275
     1276        close(fd);
     1277
     1278        for (i=0; i<count; i++) {
     1279                verify_ctdb_rec_buffer(&p1[i], p2[i]);
     1280        }
     1281
     1282        talloc_free(mem_ctx);
     1283}
     1284
    12211285int main(int argc, char *argv[])
    12221286{
     
    12411305        TEST_FUNC(ctdb_dbid_map)();
    12421306        TEST_FUNC(ctdb_pulldb)();
     1307        TEST_FUNC(ctdb_pulldb_ext)();
    12431308        TEST_FUNC(ctdb_rec_data)();
    12441309        TEST_FUNC(ctdb_rec_buffer)();
     
    12821347        TEST_FUNC(ctdb_g_lock_list)();
    12831348
     1349        test_ctdb_rec_buffer_read_write();
     1350
    12841351        return 0;
    12851352}
  • vendor/current/ctdb/utils/ping_pong/ping_pong.c

    r988 r989  
    143143        if (val == NULL) {
    144144                printf("calloc failed\n");
    145                 munmap(p, num_locks+1);
     145                if (use_mmap) {
     146                        munmap(p, num_locks+1);
     147                }
    146148                return;
    147149        }
  • vendor/current/ctdb/wscript

    r988 r989  
    209209    conf.env.CTDB_TEST_DATADIR = os.path.join(conf.env.EXEC_PREFIX,
    210210                                              'share/ctdb-tests')
    211     conf.env.CTDB_TEST_LIBDIR = os.path.join(conf.env.LIBDIR, 'ctdb-tests')
     211    conf.env.CTDB_TEST_LIBEXECDIR = os.path.join(conf.env.LIBEXECDIR, 'ctdb/tests')
    212212
    213213    # Allow unified compilation and separate compilation of utilities
     
    344344                                          '''db_hash.c srvid.c reqid.c
    345345                                             pkt_read.c pkt_write.c comm.c
    346                                              logging.c'''),
    347                         deps='replace talloc tevent tdb tevent-unix-util')
     346                                             logging.c pidfile.c'''),
     347                        deps='replace talloc tevent tdb tevent-util')
    348348
    349349    bld.SAMBA_SUBSYSTEM('ctdb-protocol',
     
    630630        'protocol_types_test',
    631631        'protocol_client_test',
     632        'pidfile_test',
    632633    ]
    633634
     
    637638        bld.SAMBA_BINARY(target,
    638639                         source=src,
    639                          deps='talloc tevent tdb tevent-unix-util',
    640                          install_path='${CTDB_TEST_LIBDIR}')
     640                         deps='talloc tevent tdb tevent-util',
     641                         install_path='${CTDB_TEST_LIBEXECDIR}')
    641642
    642643    bld.SAMBA_BINARY('reqid_test',
    643644                     source='tests/src/reqid_test.c',
    644645                     deps='samba-util',
    645                      install_path='${CTDB_TEST_LIBDIR}')
     646                     install_path='${CTDB_TEST_LIBEXECDIR}')
    646647
    647648    # Test binaries
     
    672673                         includes='include',
    673674                         deps='ctdb-client ctdb-common ctdb-util',
    674                          install_path='${CTDB_TEST_LIBDIR}')
     675                         install_path='${CTDB_TEST_LIBEXECDIR}')
    675676
    676677    bld.SAMBA_BINARY('ctdb_takeover_tests',
     
    681682                          ib_deps,
    682683                     includes='include',
    683                      install_path='${CTDB_TEST_LIBDIR}')
     684                     install_path='${CTDB_TEST_LIBEXECDIR}')
    684685
    685686    bld.SAMBA_BINARY('ctdb_functest',
     
    688689                             samba-util tdb-wrap''',
    689690                     includes='include',
    690                      install_path='${CTDB_TEST_LIBDIR}')
     691                     install_path='${CTDB_TEST_LIBEXECDIR}')
    691692
    692693    bld.SAMBA_BINARY('ctdb_stubtest',
     
    695696                             samba-util tdb-wrap''',
    696697                     includes='include',
    697                      install_path='${CTDB_TEST_LIBDIR}')
     698                     install_path='${CTDB_TEST_LIBEXECDIR}')
    698699
    699700    if bld.env.HAVE_INFINIBAND:
     
    703704                         deps='replace talloc ctdb-client ctdb-common' +
    704705                              ib_deps,
    705                          install_path='${CTDB_TEST_LIBDIR}')
     706                         install_path='${CTDB_TEST_LIBEXECDIR}')
    706707
    707708    test_subdirs = [
     
    735736
    736737    sed_expr = 's@^TEST_SCRIPTS_DIR=.*@&\\nexport TEST_BIN_DIR=\"%s\"@' % (
    737                bld.env.CTDB_TEST_LIBDIR)
     738               bld.env.CTDB_TEST_LIBEXECDIR)
    738739    bld.SAMBA_GENERATOR('ctdb-test-wrap',
    739740                        source='tests/scripts/test_wrap',
     
    744745
    745746    sed_expr1 = 's@^test_dir=.*@test_dir=%s\\nexport TEST_BIN_DIR=\"%s\"@' % (
    746                 bld.env.CTDB_TEST_DATADIR, bld.env.CTDB_TEST_LIBDIR)
     747                bld.env.CTDB_TEST_DATADIR, bld.env.CTDB_TEST_LIBEXECDIR)
    747748    sed_expr2 = 's@^\(export CTDB_TESTS_ARE_INSTALLED\)=false@\\1=true@'
    748749    bld.SAMBA_GENERATOR('ctdb-test-runner',
Note: See TracChangeset for help on using the changeset viewer.