source: vendor/current/ctdb/common/system_linux.c

Last change on this file was 989, checked in by Silvan Scherrer, 9 years ago

Samba Server: update vendor to version 4.4.7

File size: 15.8 KB
Line 
1/*
2 ctdb system specific code to manage raw sockets on linux
3
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
19*/
20
21#include "replace.h"
22#include "system/network.h"
23#include "system/filesys.h"
24#include "system/wait.h"
25
26#include "lib/util/debug.h"
27
28#include "protocol/protocol.h"
29
30#include <netinet/if_ether.h>
31#include <netinet/ip6.h>
32#include <netinet/icmp6.h>
33#include <net/if_arp.h>
34#include <netpacket/packet.h>
35#include <sys/prctl.h>
36
37#include "common/logging.h"
38#include "common/system.h"
39
40#ifndef ETHERTYPE_IP6
41#define ETHERTYPE_IP6 0x86dd
42#endif
43
44/*
45 calculate the tcp checksum for tcp over ipv6
46*/
47static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
48{
49 uint32_t phdr[2];
50 uint32_t sum = 0;
51 uint16_t sum2;
52
53 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
54 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
55
56 phdr[0] = htonl(n);
57 phdr[1] = htonl(ip6->ip6_nxt);
58 sum += uint16_checksum((uint16_t *)phdr, 8);
59
60 sum += uint16_checksum(data, n);
61
62 sum = (sum & 0xFFFF) + (sum >> 16);
63 sum = (sum & 0xFFFF) + (sum >> 16);
64 sum2 = htons(sum);
65 sum2 = ~sum2;
66 if (sum2 == 0) {
67 return 0xFFFF;
68 }
69 return sum2;
70}
71
72/*
73 send gratuitous arp reply after we have taken over an ip address
74
75 saddr is the address we are trying to claim
76 iface is the interface name we will be using to claim the address
77 */
78int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
79{
80 int s, ret;
81 struct sockaddr_ll sall;
82 struct ether_header *eh;
83 struct arphdr *ah;
84 struct ip6_hdr *ip6;
85 struct nd_neighbor_advert *nd_na;
86 struct nd_opt_hdr *nd_oh;
87 struct ifreq if_hwaddr;
88 /* Size of IPv6 neighbor advertisement (with option) */
89 unsigned char buffer[sizeof(struct ether_header) +
90 sizeof(struct ip6_hdr) +
91 sizeof(struct nd_neighbor_advert) +
92 sizeof(struct nd_opt_hdr) + ETH_ALEN];
93 char *ptr;
94 char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
95 struct ifreq ifr;
96
97 ZERO_STRUCT(sall);
98 ZERO_STRUCT(ifr);
99 ZERO_STRUCT(if_hwaddr);
100
101 switch (addr->ip.sin_family) {
102 case AF_INET:
103 s = socket(AF_PACKET, SOCK_RAW, 0);
104 if (s == -1){
105 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
106 return -1;
107 }
108
109 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
110 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
111 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
112 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
113 close(s);
114 return -1;
115 }
116
117 /* get the mac address */
118 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
119 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
120 if ( ret < 0 ) {
121 close(s);
122 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
123 return -1;
124 }
125 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
126 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
127 close(s);
128 return 0;
129 }
130 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
131 close(s);
132 errno = EINVAL;
133 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
134 if_hwaddr.ifr_hwaddr.sa_family));
135 return -1;
136 }
137
138
139 memset(buffer, 0 , 64);
140 eh = (struct ether_header *)buffer;
141 memset(eh->ether_dhost, 0xff, ETH_ALEN);
142 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
143 eh->ether_type = htons(ETHERTYPE_ARP);
144
145 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
146 ah->ar_hrd = htons(ARPHRD_ETHER);
147 ah->ar_pro = htons(ETH_P_IP);
148 ah->ar_hln = ETH_ALEN;
149 ah->ar_pln = 4;
150
151 /* send a gratious arp */
152 ah->ar_op = htons(ARPOP_REQUEST);
153 ptr = (char *)&ah[1];
154 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
155 ptr+=ETH_ALEN;
156 memcpy(ptr, &addr->ip.sin_addr, 4);
157 ptr+=4;
158 memset(ptr, 0, ETH_ALEN);
159 ptr+=ETH_ALEN;
160 memcpy(ptr, &addr->ip.sin_addr, 4);
161 ptr+=4;
162
163 sall.sll_family = AF_PACKET;
164 sall.sll_halen = 6;
165 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
166 sall.sll_protocol = htons(ETH_P_ALL);
167 sall.sll_ifindex = ifr.ifr_ifindex;
168 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
169 if (ret < 0 ){
170 close(s);
171 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
172 return -1;
173 }
174
175 /* send unsolicited arp reply broadcast */
176 ah->ar_op = htons(ARPOP_REPLY);
177 ptr = (char *)&ah[1];
178 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
179 ptr+=ETH_ALEN;
180 memcpy(ptr, &addr->ip.sin_addr, 4);
181 ptr+=4;
182 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
183 ptr+=ETH_ALEN;
184 memcpy(ptr, &addr->ip.sin_addr, 4);
185 ptr+=4;
186
187 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
188 if (ret < 0 ){
189 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
190 close(s);
191 return -1;
192 }
193
194 close(s);
195 break;
196 case AF_INET6:
197 s = socket(AF_PACKET, SOCK_RAW, 0);
198 if (s == -1){
199 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
200 return -1;
201 }
202
203 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
204 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
205 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
206 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
207 close(s);
208 return -1;
209 }
210
211 /* get the mac address */
212 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
213 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
214 if ( ret < 0 ) {
215 close(s);
216 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
217 return -1;
218 }
219 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
220 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
221 close(s);
222 return 0;
223 }
224 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
225 close(s);
226 errno = EINVAL;
227 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
228 if_hwaddr.ifr_hwaddr.sa_family));
229 return -1;
230 }
231
232 memset(buffer, 0 , sizeof(buffer));
233 eh = (struct ether_header *)buffer;
234 /* Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
235 * section 7) - note zeroes above! */
236 eh->ether_dhost[0] = eh->ether_dhost[1] = 0x33;
237 eh->ether_dhost[5] = 0x01;
238 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
239 eh->ether_type = htons(ETHERTYPE_IP6);
240
241 ip6 = (struct ip6_hdr *)(eh+1);
242 ip6->ip6_vfc = 0x60;
243 ip6->ip6_plen = htons(sizeof(*nd_na) +
244 sizeof(struct nd_opt_hdr) +
245 ETH_ALEN);
246 ip6->ip6_nxt = IPPROTO_ICMPV6;
247 ip6->ip6_hlim = 255;
248 ip6->ip6_src = addr->ip6.sin6_addr;
249 /* all-nodes multicast */
250
251 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
252 if (ret != 1) {
253 close(s);
254 DEBUG(DEBUG_CRIT,(__location__ " failed inet_pton\n"));
255 return -1;
256 }
257
258 nd_na = (struct nd_neighbor_advert *)(ip6+1);
259 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
260 nd_na->nd_na_code = 0;
261 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
262 nd_na->nd_na_target = addr->ip6.sin6_addr;
263 /* Option: Target link-layer address */
264 nd_oh = (struct nd_opt_hdr *)(nd_na+1);
265 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
266 nd_oh->nd_opt_len = 1;
267 memcpy(&(nd_oh+1)[0], if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
268
269 nd_na->nd_na_cksum = tcp_checksum6((uint16_t *)nd_na,
270 ntohs(ip6->ip6_plen), ip6);
271
272 sall.sll_family = AF_PACKET;
273 sall.sll_halen = 6;
274 memcpy(&sall.sll_addr[0], &eh->ether_dhost[0], sall.sll_halen);
275 sall.sll_protocol = htons(ETH_P_ALL);
276 sall.sll_ifindex = ifr.ifr_ifindex;
277 ret = sendto(s, buffer, sizeof(buffer),
278 0, (struct sockaddr *)&sall, sizeof(sall));
279 if (ret < 0 ){
280 close(s);
281 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
282 return -1;
283 }
284
285 close(s);
286 break;
287 default:
288 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family));
289 return -1;
290 }
291
292 return 0;
293}
294
295
296/*
297 simple TCP checksum - assumes data is multiple of 2 bytes long
298 */
299static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
300{
301 uint32_t sum = uint16_checksum(data, n);
302 uint16_t sum2;
303 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
304 sizeof(ip->saddr));
305 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
306 sizeof(ip->daddr));
307 sum += ip->protocol + n;
308 sum = (sum & 0xFFFF) + (sum >> 16);
309 sum = (sum & 0xFFFF) + (sum >> 16);
310 sum2 = htons(sum);
311 sum2 = ~sum2;
312 if (sum2 == 0) {
313 return 0xFFFF;
314 }
315 return sum2;
316}
317
318/*
319 Send tcp segment from the specified IP/port to the specified
320 destination IP/port.
321
322 This is used to trigger the receiving host into sending its own ACK,
323 which should trigger early detection of TCP reset by the client
324 after IP takeover
325
326 This can also be used to send RST segments (if rst is true) and also
327 if correct seq and ack numbers are provided.
328 */
329int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
330 const ctdb_sock_addr *src,
331 uint32_t seq, uint32_t ack, int rst)
332{
333 int s;
334 int ret;
335 uint32_t one = 1;
336 uint16_t tmpport;
337 ctdb_sock_addr *tmpdest;
338 struct {
339 struct iphdr ip;
340 struct tcphdr tcp;
341 } ip4pkt;
342 struct {
343 struct ip6_hdr ip6;
344 struct tcphdr tcp;
345 } ip6pkt;
346
347 switch (src->ip.sin_family) {
348 case AF_INET:
349 ZERO_STRUCT(ip4pkt);
350 ip4pkt.ip.version = 4;
351 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
352 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
353 ip4pkt.ip.ttl = 255;
354 ip4pkt.ip.protocol = IPPROTO_TCP;
355 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
356 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
357 ip4pkt.ip.check = 0;
358
359 ip4pkt.tcp.source = src->ip.sin_port;
360 ip4pkt.tcp.dest = dest->ip.sin_port;
361 ip4pkt.tcp.seq = seq;
362 ip4pkt.tcp.ack_seq = ack;
363 ip4pkt.tcp.ack = 1;
364 if (rst) {
365 ip4pkt.tcp.rst = 1;
366 }
367 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
368 /* this makes it easier to spot in a sniffer */
369 ip4pkt.tcp.window = htons(1234);
370 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
371
372 /* open a raw socket to send this segment from */
373 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
374 if (s == -1) {
375 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
376 strerror(errno)));
377 return -1;
378 }
379
380 ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
381 if (ret != 0) {
382 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
383 strerror(errno)));
384 close(s);
385 return -1;
386 }
387
388 set_nonblocking(s);
389 set_close_on_exec(s);
390
391 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
392 (const struct sockaddr *)&dest->ip,
393 sizeof(dest->ip));
394 close(s);
395 if (ret != sizeof(ip4pkt)) {
396 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
397 return -1;
398 }
399 break;
400 case AF_INET6:
401 ZERO_STRUCT(ip6pkt);
402 ip6pkt.ip6.ip6_vfc = 0x60;
403 ip6pkt.ip6.ip6_plen = htons(20);
404 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
405 ip6pkt.ip6.ip6_hlim = 64;
406 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
407 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
408
409 ip6pkt.tcp.source = src->ip6.sin6_port;
410 ip6pkt.tcp.dest = dest->ip6.sin6_port;
411 ip6pkt.tcp.seq = seq;
412 ip6pkt.tcp.ack_seq = ack;
413 ip6pkt.tcp.ack = 1;
414 if (rst) {
415 ip6pkt.tcp.rst = 1;
416 }
417 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
418 /* this makes it easier to spot in a sniffer */
419 ip6pkt.tcp.window = htons(1234);
420 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
421
422 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
423 if (s == -1) {
424 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
425 return -1;
426
427 }
428 /* sendto() don't like if the port is set and the socket is
429 in raw mode.
430 */
431 tmpdest = discard_const(dest);
432 tmpport = tmpdest->ip6.sin6_port;
433
434 tmpdest->ip6.sin6_port = 0;
435 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
436 (const struct sockaddr *)&dest->ip6,
437 sizeof(dest->ip6));
438 tmpdest->ip6.sin6_port = tmpport;
439 close(s);
440
441 if (ret != sizeof(ip6pkt)) {
442 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
443 return -1;
444 }
445 break;
446
447 default:
448 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
449 return -1;
450 }
451
452 return 0;
453}
454
455/*
456 This function is used to open a raw socket to capture from
457 */
458int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
459{
460 int s;
461
462 /* Open a socket to capture all traffic */
463 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
464 if (s == -1) {
465 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
466 return -1;
467 }
468
469 DEBUG(DEBUG_DEBUG, (__location__ " Created RAW SOCKET FD:%d for tcp tickle\n", s));
470
471 set_nonblocking(s);
472 set_close_on_exec(s);
473
474 return s;
475}
476
477/*
478 This function is used to do any additional cleanup required when closing
479 a capture socket.
480 Note that the socket itself is closed automatically in the caller.
481 */
482int ctdb_sys_close_capture_socket(void *private_data)
483{
484 return 0;
485}
486
487
488/*
489 called when the raw socket becomes readable
490 */
491int ctdb_sys_read_tcp_packet(int s, void *private_data,
492 ctdb_sock_addr *src, ctdb_sock_addr *dst,
493 uint32_t *ack_seq, uint32_t *seq)
494{
495 int ret;
496#define RCVPKTSIZE 100
497 char pkt[RCVPKTSIZE];
498 struct ether_header *eth;
499 struct iphdr *ip;
500 struct ip6_hdr *ip6;
501 struct tcphdr *tcp;
502
503 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
504 if (ret < sizeof(*eth)+sizeof(*ip)) {
505 return -1;
506 }
507
508 /* Ethernet */
509 eth = (struct ether_header *)pkt;
510
511 /* we want either IPv4 or IPv6 */
512 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
513 /* IP */
514 ip = (struct iphdr *)(eth+1);
515
516 /* We only want IPv4 packets */
517 if (ip->version != 4) {
518 return -1;
519 }
520 /* Dont look at fragments */
521 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
522 return -1;
523 }
524 /* we only want TCP */
525 if (ip->protocol != IPPROTO_TCP) {
526 return -1;
527 }
528
529 /* make sure its not a short packet */
530 if (offsetof(struct tcphdr, ack_seq) + 4 +
531 (ip->ihl*4) + sizeof(*eth) > ret) {
532 return -1;
533 }
534 /* TCP */
535 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
536
537 /* tell the caller which one we've found */
538 src->ip.sin_family = AF_INET;
539 src->ip.sin_addr.s_addr = ip->saddr;
540 src->ip.sin_port = tcp->source;
541 dst->ip.sin_family = AF_INET;
542 dst->ip.sin_addr.s_addr = ip->daddr;
543 dst->ip.sin_port = tcp->dest;
544 *ack_seq = tcp->ack_seq;
545 *seq = tcp->seq;
546
547 return 0;
548 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
549 /* IP6 */
550 ip6 = (struct ip6_hdr *)(eth+1);
551
552 /* we only want TCP */
553 if (ip6->ip6_nxt != IPPROTO_TCP) {
554 return -1;
555 }
556
557 /* TCP */
558 tcp = (struct tcphdr *)(ip6+1);
559
560 /* tell the caller which one we've found */
561 src->ip6.sin6_family = AF_INET6;
562 src->ip6.sin6_port = tcp->source;
563 src->ip6.sin6_addr = ip6->ip6_src;
564
565 dst->ip6.sin6_family = AF_INET6;
566 dst->ip6.sin6_port = tcp->dest;
567 dst->ip6.sin6_addr = ip6->ip6_dst;
568
569 *ack_seq = tcp->ack_seq;
570 *seq = tcp->seq;
571
572 return 0;
573 }
574
575 return -1;
576}
577
578
579bool ctdb_sys_check_iface_exists(const char *iface)
580{
581 int s;
582 struct ifreq ifr;
583
584 s = socket(AF_PACKET, SOCK_RAW, 0);
585 if (s == -1){
586 /* We don't know if the interface exists, so assume yes */
587 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
588 return true;
589 }
590
591 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
592 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0 && errno == ENODEV) {
593 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
594 close(s);
595 return false;
596 }
597 close(s);
598
599 return true;
600}
601
602int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
603{
604 struct ucred cr;
605 socklen_t crl = sizeof(struct ucred);
606 int ret;
607 if ((ret = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl) == 0)) {
608 *peer_pid = cr.pid;
609 }
610 return ret;
611}
Note: See TracBrowser for help on using the repository browser.