1 | /*
|
---|
2 | persistent store logic
|
---|
3 |
|
---|
4 | Copyright (C) Andrew Tridgell 2007
|
---|
5 | Copyright (C) Ronnie Sahlberg 2007
|
---|
6 |
|
---|
7 | This program is free software; you can redistribute it and/or modify
|
---|
8 | it under the terms of the GNU General Public License as published by
|
---|
9 | the Free Software Foundation; either version 3 of the License, or
|
---|
10 | (at your option) any later version.
|
---|
11 |
|
---|
12 | This program is distributed in the hope that it will be useful,
|
---|
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | GNU General Public License for more details.
|
---|
16 |
|
---|
17 | You should have received a copy of the GNU General Public License
|
---|
18 | along with this program; if not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 |
|
---|
21 | #include "replace.h"
|
---|
22 | #include "system/filesys.h"
|
---|
23 | #include "system/network.h"
|
---|
24 | #include "system/time.h"
|
---|
25 | #include "system/wait.h"
|
---|
26 |
|
---|
27 | #include <talloc.h>
|
---|
28 | #include <tevent.h>
|
---|
29 |
|
---|
30 | #include "lib/tdb_wrap/tdb_wrap.h"
|
---|
31 | #include "lib/util/debug.h"
|
---|
32 | #include "lib/util/samba_util.h"
|
---|
33 |
|
---|
34 | #include "ctdb_private.h"
|
---|
35 |
|
---|
36 | #include "common/reqid.h"
|
---|
37 | #include "common/common.h"
|
---|
38 | #include "common/logging.h"
|
---|
39 |
|
---|
40 | struct ctdb_persistent_state {
|
---|
41 | struct ctdb_context *ctdb;
|
---|
42 | struct ctdb_db_context *ctdb_db; /* used by trans3_commit */
|
---|
43 | struct ctdb_client *client; /* used by trans3_commit */
|
---|
44 | struct ctdb_req_control_old *c;
|
---|
45 | const char *errormsg;
|
---|
46 | uint32_t num_pending;
|
---|
47 | int32_t status;
|
---|
48 | uint32_t num_failed, num_sent;
|
---|
49 | };
|
---|
50 |
|
---|
51 | /*
|
---|
52 | 1) all nodes fail, and all nodes reply
|
---|
53 | 2) some nodes fail, all nodes reply
|
---|
54 | 3) some nodes timeout
|
---|
55 | 4) all nodes succeed
|
---|
56 | */
|
---|
57 |
|
---|
58 | /*
|
---|
59 | called when a node has acknowledged a ctdb_control_update_record call
|
---|
60 | */
|
---|
61 | static void ctdb_persistent_callback(struct ctdb_context *ctdb,
|
---|
62 | int32_t status, TDB_DATA data,
|
---|
63 | const char *errormsg,
|
---|
64 | void *private_data)
|
---|
65 | {
|
---|
66 | struct ctdb_persistent_state *state = talloc_get_type(private_data,
|
---|
67 | struct ctdb_persistent_state);
|
---|
68 |
|
---|
69 | if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
|
---|
70 | DEBUG(DEBUG_INFO, ("ctdb_persistent_callback: ignoring reply "
|
---|
71 | "during recovery\n"));
|
---|
72 | return;
|
---|
73 | }
|
---|
74 |
|
---|
75 | if (status != 0) {
|
---|
76 | DEBUG(DEBUG_ERR,("ctdb_persistent_callback failed with status %d (%s)\n",
|
---|
77 | status, errormsg?errormsg:"no error message given"));
|
---|
78 | state->status = status;
|
---|
79 | state->errormsg = errormsg;
|
---|
80 | state->num_failed++;
|
---|
81 |
|
---|
82 | /*
|
---|
83 | * If a node failed to complete the update_record control,
|
---|
84 | * then either a recovery is already running or something
|
---|
85 | * bad is going on. So trigger a recovery and let the
|
---|
86 | * recovery finish the transaction, sending back the reply
|
---|
87 | * for the trans3_commit control to the client.
|
---|
88 | */
|
---|
89 | ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
|
---|
90 | return;
|
---|
91 | }
|
---|
92 |
|
---|
93 | state->num_pending--;
|
---|
94 |
|
---|
95 | if (state->num_pending != 0) {
|
---|
96 | return;
|
---|
97 | }
|
---|
98 |
|
---|
99 | ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, state->errormsg);
|
---|
100 | talloc_free(state);
|
---|
101 | }
|
---|
102 |
|
---|
103 | /*
|
---|
104 | called if persistent store times out
|
---|
105 | */
|
---|
106 | static void ctdb_persistent_store_timeout(struct tevent_context *ev,
|
---|
107 | struct tevent_timer *te,
|
---|
108 | struct timeval t, void *private_data)
|
---|
109 | {
|
---|
110 | struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
|
---|
111 |
|
---|
112 | if (state->ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
|
---|
113 | DEBUG(DEBUG_INFO, ("ctdb_persistent_store_timeout: ignoring "
|
---|
114 | "timeout during recovery\n"));
|
---|
115 | return;
|
---|
116 | }
|
---|
117 |
|
---|
118 | ctdb_request_control_reply(state->ctdb, state->c, NULL, 1,
|
---|
119 | "timeout in ctdb_persistent_state");
|
---|
120 |
|
---|
121 | talloc_free(state);
|
---|
122 | }
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * Finish pending trans3 commit controls, i.e. send
|
---|
126 | * reply to the client. This is called by the end-recovery
|
---|
127 | * control to fix the situation when a recovery interrupts
|
---|
128 | * the usual progress of a transaction.
|
---|
129 | */
|
---|
130 | void ctdb_persistent_finish_trans3_commits(struct ctdb_context *ctdb)
|
---|
131 | {
|
---|
132 | struct ctdb_db_context *ctdb_db;
|
---|
133 |
|
---|
134 | if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
|
---|
135 | DEBUG(DEBUG_INFO, ("ctdb_persistent_finish_trans3_commits: "
|
---|
136 | "skipping execution when recovery is "
|
---|
137 | "active\n"));
|
---|
138 | return;
|
---|
139 | }
|
---|
140 |
|
---|
141 | for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
|
---|
142 | struct ctdb_persistent_state *state;
|
---|
143 |
|
---|
144 | if (ctdb_db->persistent_state == NULL) {
|
---|
145 | continue;
|
---|
146 | }
|
---|
147 |
|
---|
148 | state = ctdb_db->persistent_state;
|
---|
149 |
|
---|
150 | ctdb_request_control_reply(ctdb, state->c, NULL, 2,
|
---|
151 | "trans3 commit ended by recovery");
|
---|
152 |
|
---|
153 | /* The destructor sets ctdb_db->persistent_state to NULL. */
|
---|
154 | talloc_free(state);
|
---|
155 | }
|
---|
156 | }
|
---|
157 |
|
---|
158 | static int ctdb_persistent_state_destructor(struct ctdb_persistent_state *state)
|
---|
159 | {
|
---|
160 | if (state->client != NULL) {
|
---|
161 | state->client->db_id = 0;
|
---|
162 | }
|
---|
163 |
|
---|
164 | if (state->ctdb_db != NULL) {
|
---|
165 | state->ctdb_db->persistent_state = NULL;
|
---|
166 | }
|
---|
167 |
|
---|
168 | return 0;
|
---|
169 | }
|
---|
170 |
|
---|
171 | /*
|
---|
172 | * Store a set of persistent records.
|
---|
173 | * This is used to roll out a transaction to all nodes.
|
---|
174 | */
|
---|
175 | int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
|
---|
176 | struct ctdb_req_control_old *c,
|
---|
177 | TDB_DATA recdata, bool *async_reply)
|
---|
178 | {
|
---|
179 | struct ctdb_client *client;
|
---|
180 | struct ctdb_persistent_state *state;
|
---|
181 | int i;
|
---|
182 | struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
|
---|
183 | struct ctdb_db_context *ctdb_db;
|
---|
184 |
|
---|
185 | if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
|
---|
186 | DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans3_commit when recovery active\n"));
|
---|
187 | return -1;
|
---|
188 | }
|
---|
189 |
|
---|
190 | client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
|
---|
191 | if (client == NULL) {
|
---|
192 | DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store "
|
---|
193 | "to a client. Returning error\n"));
|
---|
194 | return -1;
|
---|
195 | }
|
---|
196 |
|
---|
197 | if (client->db_id != 0) {
|
---|
198 | DEBUG(DEBUG_ERR,(__location__ " ERROR: trans3_commit: "
|
---|
199 | "client-db_id[0x%08x] != 0 "
|
---|
200 | "(client_id[0x%08x]): trans3_commit active?\n",
|
---|
201 | client->db_id, client->client_id));
|
---|
202 | return -1;
|
---|
203 | }
|
---|
204 |
|
---|
205 | ctdb_db = find_ctdb_db(ctdb, m->db_id);
|
---|
206 | if (ctdb_db == NULL) {
|
---|
207 | DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans3_commit: "
|
---|
208 | "Unknown database db_id[0x%08x]\n", m->db_id));
|
---|
209 | return -1;
|
---|
210 | }
|
---|
211 |
|
---|
212 | if (ctdb_db->persistent_state != NULL) {
|
---|
213 | DEBUG(DEBUG_ERR, (__location__ " Error: "
|
---|
214 | "ctdb_control_trans3_commit "
|
---|
215 | "called while a transaction commit is "
|
---|
216 | "active. db_id[0x%08x]\n", m->db_id));
|
---|
217 | return -1;
|
---|
218 | }
|
---|
219 |
|
---|
220 | ctdb_db->persistent_state = talloc_zero(ctdb_db,
|
---|
221 | struct ctdb_persistent_state);
|
---|
222 | CTDB_NO_MEMORY(ctdb, ctdb_db->persistent_state);
|
---|
223 |
|
---|
224 | client->db_id = m->db_id;
|
---|
225 |
|
---|
226 | state = ctdb_db->persistent_state;
|
---|
227 | state->ctdb = ctdb;
|
---|
228 | state->ctdb_db = ctdb_db;
|
---|
229 | state->c = c;
|
---|
230 | state->client = client;
|
---|
231 |
|
---|
232 | talloc_set_destructor(state, ctdb_persistent_state_destructor);
|
---|
233 |
|
---|
234 | for (i = 0; i < ctdb->vnn_map->size; i++) {
|
---|
235 | struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
|
---|
236 | int ret;
|
---|
237 |
|
---|
238 | /* only send to active nodes */
|
---|
239 | if (node->flags & NODE_FLAGS_INACTIVE) {
|
---|
240 | continue;
|
---|
241 | }
|
---|
242 |
|
---|
243 | ret = ctdb_daemon_send_control(ctdb, node->pnn, 0,
|
---|
244 | CTDB_CONTROL_UPDATE_RECORD,
|
---|
245 | c->client_id, 0, recdata,
|
---|
246 | ctdb_persistent_callback,
|
---|
247 | state);
|
---|
248 | if (ret == -1) {
|
---|
249 | DEBUG(DEBUG_ERR,("Unable to send "
|
---|
250 | "CTDB_CONTROL_UPDATE_RECORD "
|
---|
251 | "to pnn %u\n", node->pnn));
|
---|
252 | talloc_free(state);
|
---|
253 | return -1;
|
---|
254 | }
|
---|
255 |
|
---|
256 | state->num_pending++;
|
---|
257 | state->num_sent++;
|
---|
258 | }
|
---|
259 |
|
---|
260 | if (state->num_pending == 0) {
|
---|
261 | talloc_free(state);
|
---|
262 | return 0;
|
---|
263 | }
|
---|
264 |
|
---|
265 | /* we need to wait for the replies */
|
---|
266 | *async_reply = true;
|
---|
267 |
|
---|
268 | /* need to keep the control structure around */
|
---|
269 | talloc_steal(state, c);
|
---|
270 |
|
---|
271 | /* but we won't wait forever */
|
---|
272 | tevent_add_timer(ctdb->ev, state,
|
---|
273 | timeval_current_ofs(ctdb->tunable.control_timeout, 0),
|
---|
274 | ctdb_persistent_store_timeout, state);
|
---|
275 |
|
---|
276 | return 0;
|
---|
277 | }
|
---|
278 |
|
---|
279 |
|
---|
280 | /*
|
---|
281 | backwards compatibility:
|
---|
282 |
|
---|
283 | start a persistent store operation. passing both the key, header and
|
---|
284 | data to the daemon. If the client disconnects before it has issued
|
---|
285 | a persistent_update call to the daemon we trigger a full recovery
|
---|
286 | to ensure the databases are brought back in sync.
|
---|
287 | for now we ignore the recdata that the client has passed to us.
|
---|
288 | */
|
---|
289 | int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
|
---|
290 | struct ctdb_req_control_old *c,
|
---|
291 | TDB_DATA recdata)
|
---|
292 | {
|
---|
293 | struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
|
---|
294 |
|
---|
295 | if (client == NULL) {
|
---|
296 | DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n"));
|
---|
297 | return -1;
|
---|
298 | }
|
---|
299 |
|
---|
300 | client->num_persistent_updates++;
|
---|
301 |
|
---|
302 | return 0;
|
---|
303 | }
|
---|
304 |
|
---|
305 | /*
|
---|
306 | backwards compatibility:
|
---|
307 |
|
---|
308 | called to tell ctdbd that it is no longer doing a persistent update
|
---|
309 | */
|
---|
310 | int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
|
---|
311 | struct ctdb_req_control_old *c,
|
---|
312 | TDB_DATA recdata)
|
---|
313 | {
|
---|
314 | struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
|
---|
315 |
|
---|
316 | if (client == NULL) {
|
---|
317 | DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n"));
|
---|
318 | return -1;
|
---|
319 | }
|
---|
320 |
|
---|
321 | if (client->num_persistent_updates > 0) {
|
---|
322 | client->num_persistent_updates--;
|
---|
323 | }
|
---|
324 |
|
---|
325 | return 0;
|
---|
326 | }
|
---|
327 |
|
---|
328 | static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
|
---|
329 | uint32_t db_id,
|
---|
330 | uint64_t *seqnum)
|
---|
331 | {
|
---|
332 | int32_t ret;
|
---|
333 | struct ctdb_db_context *ctdb_db;
|
---|
334 | const char *keyname = CTDB_DB_SEQNUM_KEY;
|
---|
335 | TDB_DATA key;
|
---|
336 | TDB_DATA data;
|
---|
337 | TALLOC_CTX *mem_ctx = talloc_new(ctdb);
|
---|
338 | struct ctdb_ltdb_header header;
|
---|
339 |
|
---|
340 | ctdb_db = find_ctdb_db(ctdb, db_id);
|
---|
341 | if (!ctdb_db) {
|
---|
342 | DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
|
---|
343 | ret = -1;
|
---|
344 | goto done;
|
---|
345 | }
|
---|
346 |
|
---|
347 | key.dptr = (uint8_t *)discard_const(keyname);
|
---|
348 | key.dsize = strlen(keyname) + 1;
|
---|
349 |
|
---|
350 | ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, &header, mem_ctx, &data);
|
---|
351 | if (ret != 0) {
|
---|
352 | goto done;
|
---|
353 | }
|
---|
354 |
|
---|
355 | if (data.dsize != sizeof(uint64_t)) {
|
---|
356 | *seqnum = 0;
|
---|
357 | goto done;
|
---|
358 | }
|
---|
359 |
|
---|
360 | *seqnum = *(uint64_t *)data.dptr;
|
---|
361 |
|
---|
362 | done:
|
---|
363 | talloc_free(mem_ctx);
|
---|
364 | return ret;
|
---|
365 | }
|
---|
366 |
|
---|
367 | /**
|
---|
368 | * Get the sequence number of a persistent database.
|
---|
369 | */
|
---|
370 | int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
|
---|
371 | TDB_DATA indata,
|
---|
372 | TDB_DATA *outdata)
|
---|
373 | {
|
---|
374 | uint32_t db_id;
|
---|
375 | int32_t ret;
|
---|
376 | uint64_t seqnum;
|
---|
377 |
|
---|
378 | db_id = *(uint32_t *)indata.dptr;
|
---|
379 | ret = ctdb_get_db_seqnum(ctdb, db_id, &seqnum);
|
---|
380 | if (ret != 0) {
|
---|
381 | goto done;
|
---|
382 | }
|
---|
383 |
|
---|
384 | outdata->dsize = sizeof(uint64_t);
|
---|
385 | outdata->dptr = talloc_memdup(outdata, &seqnum, sizeof(uint64_t));
|
---|
386 | if (outdata->dptr == NULL) {
|
---|
387 | ret = -1;
|
---|
388 | }
|
---|
389 |
|
---|
390 | done:
|
---|
391 | return ret;
|
---|
392 | }
|
---|