1 | /*
|
---|
2 | * Unix SMB/CIFS implementation.
|
---|
3 | * Support for OneFS system interfaces.
|
---|
4 | *
|
---|
5 | * Copyright (C) Tim Prouty, 2008
|
---|
6 | *
|
---|
7 | * This program is free software; you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation; either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * This program is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 |
|
---|
21 | #include "includes.h"
|
---|
22 | #include "onefs.h"
|
---|
23 | #include "onefs_config.h"
|
---|
24 | #include "oplock_onefs.h"
|
---|
25 |
|
---|
26 | #include <ifs/ifs_syscalls.h>
|
---|
27 | #include <isi_acl/isi_acl_util.h>
|
---|
28 | #include <sys/isi_acl.h>
|
---|
29 |
|
---|
30 | /*
|
---|
31 | * Initialize the sm_lock struct before passing it to ifs_createfile.
|
---|
32 | */
|
---|
33 | static void smlock_init(connection_struct *conn, struct sm_lock *sml,
|
---|
34 | bool isexe, uint32_t access_mask, uint32_t share_access,
|
---|
35 | uint32_t create_options)
|
---|
36 | {
|
---|
37 | sml->sm_type.doc = false;
|
---|
38 | sml->sm_type.isexe = isexe;
|
---|
39 | sml->sm_type.statonly = is_stat_open(access_mask);
|
---|
40 | sml->sm_type.access_mask = access_mask;
|
---|
41 | sml->sm_type.share_access = share_access;
|
---|
42 |
|
---|
43 | /*
|
---|
44 | * private_options was previously used for DENY_DOS/DENY_FCB checks in
|
---|
45 | * the kernel, but are now properly handled by fcb_or_dos_open. In
|
---|
46 | * these cases, ifs_createfile will return a sharing violation, which
|
---|
47 | * gives fcb_or_dos_open the chance to open a duplicate file handle.
|
---|
48 | */
|
---|
49 | sml->sm_type.private_options = 0;
|
---|
50 |
|
---|
51 | /* 1 second delay is handled in onefs_open.c by deferring the open */
|
---|
52 | sml->sm_timeout = timeval_set(0, 0);
|
---|
53 | }
|
---|
54 |
|
---|
55 | static void smlock_dump(int debuglevel, const struct sm_lock *sml)
|
---|
56 | {
|
---|
57 | if (sml == NULL) {
|
---|
58 | DEBUG(debuglevel, ("sml == NULL\n"));
|
---|
59 | return;
|
---|
60 | }
|
---|
61 |
|
---|
62 | DEBUG(debuglevel,
|
---|
63 | ("smlock: doc=%s, isexec=%s, statonly=%s, access_mask=0x%x, "
|
---|
64 | "share_access=0x%x, private_options=0x%x timeout=%d/%d\n",
|
---|
65 | sml->sm_type.doc ? "True" : "False",
|
---|
66 | sml->sm_type.isexe ? "True" : "False",
|
---|
67 | sml->sm_type.statonly ? "True" : "False",
|
---|
68 | sml->sm_type.access_mask,
|
---|
69 | sml->sm_type.share_access,
|
---|
70 | sml->sm_type.private_options,
|
---|
71 | (int)sml->sm_timeout.tv_sec,
|
---|
72 | (int)sml->sm_timeout.tv_usec));
|
---|
73 | }
|
---|
74 |
|
---|
75 | /**
|
---|
76 | * External interface to ifs_createfile
|
---|
77 | */
|
---|
78 | int onefs_sys_create_file(connection_struct *conn,
|
---|
79 | int base_fd,
|
---|
80 | const char *path,
|
---|
81 | uint32_t access_mask,
|
---|
82 | uint32_t open_access_mask,
|
---|
83 | uint32_t share_access,
|
---|
84 | uint32_t create_options,
|
---|
85 | int flags,
|
---|
86 | mode_t mode,
|
---|
87 | int oplock_request,
|
---|
88 | uint64_t id,
|
---|
89 | struct security_descriptor *sd,
|
---|
90 | uint32_t dos_flags,
|
---|
91 | int *granted_oplock)
|
---|
92 | {
|
---|
93 | struct sm_lock sml, *psml = NULL;
|
---|
94 | enum oplock_type onefs_oplock;
|
---|
95 | enum oplock_type onefs_granted_oplock = OPLOCK_NONE;
|
---|
96 | struct ifs_security_descriptor ifs_sd = {}, *pifs_sd = NULL;
|
---|
97 | uint32_t sec_info_effective = 0;
|
---|
98 | int ret_fd = -1;
|
---|
99 | uint32_t onefs_dos_attributes;
|
---|
100 | struct ifs_createfile_flags cf_flags = CF_FLAGS_NONE;
|
---|
101 | char *mapped_name = NULL;
|
---|
102 | NTSTATUS result;
|
---|
103 |
|
---|
104 | START_PROFILE(syscall_createfile);
|
---|
105 |
|
---|
106 | /* Translate the name to UNIX before calling ifs_createfile */
|
---|
107 | mapped_name = talloc_strdup(talloc_tos(), path);
|
---|
108 | if (mapped_name == NULL) {
|
---|
109 | errno = ENOMEM;
|
---|
110 | goto out;
|
---|
111 | }
|
---|
112 | result = SMB_VFS_TRANSLATE_NAME(conn, &mapped_name,
|
---|
113 | vfs_translate_to_unix);
|
---|
114 | if (!NT_STATUS_IS_OK(result)) {
|
---|
115 | goto out;
|
---|
116 | }
|
---|
117 |
|
---|
118 | /* Setup security descriptor and get secinfo. */
|
---|
119 | if (sd != NULL) {
|
---|
120 | NTSTATUS status;
|
---|
121 | uint32_t sec_info_sent = 0;
|
---|
122 |
|
---|
123 | sec_info_sent = (get_sec_info(sd) & IFS_SEC_INFO_KNOWN_MASK);
|
---|
124 |
|
---|
125 | status = onefs_samba_sd_to_sd(sec_info_sent, sd, &ifs_sd,
|
---|
126 | SNUM(conn), &sec_info_effective);
|
---|
127 |
|
---|
128 | if (!NT_STATUS_IS_OK(status)) {
|
---|
129 | DEBUG(1, ("SD initialization failure: %s\n",
|
---|
130 | nt_errstr(status)));
|
---|
131 | errno = EINVAL;
|
---|
132 | goto out;
|
---|
133 | }
|
---|
134 |
|
---|
135 | pifs_sd = &ifs_sd;
|
---|
136 | }
|
---|
137 |
|
---|
138 | /* Stripping off private bits will be done for us. */
|
---|
139 | onefs_oplock = onefs_samba_oplock_to_oplock(oplock_request);
|
---|
140 |
|
---|
141 | if (!lp_oplocks(SNUM(conn))) {
|
---|
142 | SMB_ASSERT(onefs_oplock == OPLOCK_NONE);
|
---|
143 | }
|
---|
144 |
|
---|
145 | /* Convert samba dos flags to UF_DOS_* attributes. */
|
---|
146 | onefs_dos_attributes = dos_attributes_to_stat_dos_flags(dos_flags);
|
---|
147 |
|
---|
148 | /**
|
---|
149 | * Deal with kernel creating Default ACLs. (Isilon bug 47447.)
|
---|
150 | *
|
---|
151 | * 1) "nt acl support = no", default_acl = no
|
---|
152 | * 2) "inherit permissions = yes", default_acl = no
|
---|
153 | */
|
---|
154 | if (lp_nt_acl_support(SNUM(conn)) && !lp_inherit_perms(SNUM(conn)))
|
---|
155 | cf_flags = cf_flags_or(cf_flags, CF_FLAGS_DEFAULT_ACL);
|
---|
156 |
|
---|
157 | /*
|
---|
158 | * Some customer workflows require the execute bit to be ignored.
|
---|
159 | */
|
---|
160 | if (lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
---|
161 | PARM_ALLOW_EXECUTE_ALWAYS,
|
---|
162 | PARM_ALLOW_EXECUTE_ALWAYS_DEFAULT) &&
|
---|
163 | (open_access_mask & FILE_EXECUTE)) {
|
---|
164 |
|
---|
165 | DEBUG(3, ("Stripping execute bit from %s: (0x%x)\n", mapped_name,
|
---|
166 | open_access_mask));
|
---|
167 |
|
---|
168 | /* Strip execute. */
|
---|
169 | open_access_mask &= ~FILE_EXECUTE;
|
---|
170 |
|
---|
171 | /*
|
---|
172 | * Add READ_DATA, so we're not left with desired_access=0. An
|
---|
173 | * execute call should imply the client will read the data.
|
---|
174 | */
|
---|
175 | open_access_mask |= FILE_READ_DATA;
|
---|
176 |
|
---|
177 | DEBUGADD(3, ("New stripped access mask: 0x%x\n",
|
---|
178 | open_access_mask));
|
---|
179 | }
|
---|
180 |
|
---|
181 | DEBUG(10,("onefs_sys_create_file: base_fd = %d, fname = %s "
|
---|
182 | "open_access_mask = 0x%x, flags = 0x%x, mode = 0%o, "
|
---|
183 | "desired_oplock = %s, id = 0x%x, secinfo = 0x%x, sd = %p, "
|
---|
184 | "dos_attributes = 0x%x, path = %s, "
|
---|
185 | "default_acl=%s\n", base_fd, mapped_name,
|
---|
186 | (unsigned int)open_access_mask,
|
---|
187 | (unsigned int)flags,
|
---|
188 | (unsigned int)mode,
|
---|
189 | onefs_oplock_str(onefs_oplock),
|
---|
190 | (unsigned int)id,
|
---|
191 | sec_info_effective, sd,
|
---|
192 | (unsigned int)onefs_dos_attributes, mapped_name,
|
---|
193 | cf_flags_and_bool(cf_flags, CF_FLAGS_DEFAULT_ACL) ?
|
---|
194 | "true" : "false"));
|
---|
195 |
|
---|
196 | /* Initialize smlock struct for files/dirs but not internal opens */
|
---|
197 | if (!(oplock_request & INTERNAL_OPEN_ONLY)) {
|
---|
198 | smlock_init(conn, &sml, is_executable(mapped_name), access_mask,
|
---|
199 | share_access, create_options);
|
---|
200 | psml = &sml;
|
---|
201 | }
|
---|
202 |
|
---|
203 | smlock_dump(10, psml);
|
---|
204 |
|
---|
205 | ret_fd = ifs_createfile(base_fd, mapped_name,
|
---|
206 | (enum ifs_ace_rights)open_access_mask, flags & ~O_ACCMODE, mode,
|
---|
207 | onefs_oplock, id, psml, sec_info_effective, pifs_sd,
|
---|
208 | onefs_dos_attributes, cf_flags, &onefs_granted_oplock);
|
---|
209 |
|
---|
210 | DEBUG(10,("onefs_sys_create_file(%s): ret_fd = %d, "
|
---|
211 | "onefs_granted_oplock = %s\n",
|
---|
212 | ret_fd < 0 ? strerror(errno) : "success", ret_fd,
|
---|
213 | onefs_oplock_str(onefs_granted_oplock)));
|
---|
214 |
|
---|
215 | if (granted_oplock) {
|
---|
216 | *granted_oplock =
|
---|
217 | onefs_oplock_to_samba_oplock(onefs_granted_oplock);
|
---|
218 | }
|
---|
219 |
|
---|
220 | out:
|
---|
221 | END_PROFILE(syscall_createfile);
|
---|
222 | aclu_free_sd(pifs_sd, false);
|
---|
223 | TALLOC_FREE(mapped_name);
|
---|
224 |
|
---|
225 | return ret_fd;
|
---|
226 | }
|
---|
227 |
|
---|
228 | /**
|
---|
229 | * FreeBSD based sendfile implementation that allows for atomic semantics.
|
---|
230 | */
|
---|
231 | static ssize_t onefs_sys_do_sendfile(int tofd, int fromfd,
|
---|
232 | const DATA_BLOB *header, SMB_OFF_T offset, size_t count, bool atomic)
|
---|
233 | {
|
---|
234 | size_t total=0;
|
---|
235 | struct sf_hdtr hdr;
|
---|
236 | struct iovec hdtrl;
|
---|
237 | size_t hdr_len = 0;
|
---|
238 | int flags = 0;
|
---|
239 |
|
---|
240 | if (atomic) {
|
---|
241 | flags = SF_ATOMIC;
|
---|
242 | }
|
---|
243 |
|
---|
244 | hdr.headers = &hdtrl;
|
---|
245 | hdr.hdr_cnt = 1;
|
---|
246 | hdr.trailers = NULL;
|
---|
247 | hdr.trl_cnt = 0;
|
---|
248 |
|
---|
249 | /* Set up the header iovec. */
|
---|
250 | if (header) {
|
---|
251 | hdtrl.iov_base = (void *)header->data;
|
---|
252 | hdtrl.iov_len = hdr_len = header->length;
|
---|
253 | } else {
|
---|
254 | hdtrl.iov_base = NULL;
|
---|
255 | hdtrl.iov_len = 0;
|
---|
256 | }
|
---|
257 |
|
---|
258 | total = count;
|
---|
259 | while (total + hdtrl.iov_len) {
|
---|
260 | SMB_OFF_T nwritten;
|
---|
261 | int ret;
|
---|
262 |
|
---|
263 | /*
|
---|
264 | * FreeBSD sendfile returns 0 on success, -1 on error.
|
---|
265 | * Remember, the tofd and fromfd are reversed..... :-).
|
---|
266 | * nwritten includes the header data sent.
|
---|
267 | */
|
---|
268 |
|
---|
269 | do {
|
---|
270 | ret = sendfile(fromfd, tofd, offset, total, &hdr,
|
---|
271 | &nwritten, flags);
|
---|
272 | } while (ret == -1 && errno == EINTR);
|
---|
273 |
|
---|
274 | /* On error we're done. */
|
---|
275 | if (ret == -1) {
|
---|
276 | return -1;
|
---|
277 | }
|
---|
278 |
|
---|
279 | /*
|
---|
280 | * If this was an ATOMIC sendfile, nwritten doesn't
|
---|
281 | * necessarily indicate an error. It could mean count > than
|
---|
282 | * what sendfile can handle atomically (usually 64K) or that
|
---|
283 | * there was a short read due to the file being truncated.
|
---|
284 | */
|
---|
285 | if (nwritten == 0) {
|
---|
286 | return atomic ? 0 : -1;
|
---|
287 | }
|
---|
288 |
|
---|
289 | /*
|
---|
290 | * An atomic sendfile should never send partial data!
|
---|
291 | */
|
---|
292 | if (atomic && nwritten != total + hdtrl.iov_len) {
|
---|
293 | DEBUG(0,("Atomic sendfile() sent partial data: "
|
---|
294 | "%llu of %d\n", nwritten,
|
---|
295 | total + hdtrl.iov_len));
|
---|
296 | return -1;
|
---|
297 | }
|
---|
298 |
|
---|
299 | /*
|
---|
300 | * If this was a short (signal interrupted) write we may need
|
---|
301 | * to subtract it from the header data, or null out the header
|
---|
302 | * data altogether if we wrote more than hdtrl.iov_len bytes.
|
---|
303 | * We change nwritten to be the number of file bytes written.
|
---|
304 | */
|
---|
305 |
|
---|
306 | if (hdtrl.iov_base && hdtrl.iov_len) {
|
---|
307 | if (nwritten >= hdtrl.iov_len) {
|
---|
308 | nwritten -= hdtrl.iov_len;
|
---|
309 | hdtrl.iov_base = NULL;
|
---|
310 | hdtrl.iov_len = 0;
|
---|
311 | } else {
|
---|
312 | hdtrl.iov_base =
|
---|
313 | (void *)((caddr_t)hdtrl.iov_base + nwritten);
|
---|
314 | hdtrl.iov_len -= nwritten;
|
---|
315 | nwritten = 0;
|
---|
316 | }
|
---|
317 | }
|
---|
318 | total -= nwritten;
|
---|
319 | offset += nwritten;
|
---|
320 | }
|
---|
321 | return count + hdr_len;
|
---|
322 | }
|
---|
323 |
|
---|
324 | /**
|
---|
325 | * Handles the subtleties of using sendfile with CIFS.
|
---|
326 | */
|
---|
327 | ssize_t onefs_sys_sendfile(connection_struct *conn, int tofd, int fromfd,
|
---|
328 | const DATA_BLOB *header, SMB_OFF_T offset,
|
---|
329 | size_t count)
|
---|
330 | {
|
---|
331 | bool atomic = false;
|
---|
332 | ssize_t ret = 0;
|
---|
333 |
|
---|
334 | START_PROFILE_BYTES(syscall_sendfile, count);
|
---|
335 |
|
---|
336 | if (lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
---|
337 | PARM_ATOMIC_SENDFILE,
|
---|
338 | PARM_ATOMIC_SENDFILE_DEFAULT)) {
|
---|
339 | atomic = true;
|
---|
340 | }
|
---|
341 |
|
---|
342 | /* Try the sendfile */
|
---|
343 | ret = onefs_sys_do_sendfile(tofd, fromfd, header, offset, count,
|
---|
344 | atomic);
|
---|
345 |
|
---|
346 | /* If the sendfile wasn't atomic, we're done. */
|
---|
347 | if (!atomic) {
|
---|
348 | DEBUG(10, ("non-atomic sendfile read %ul bytes\n", ret));
|
---|
349 | END_PROFILE(syscall_sendfile);
|
---|
350 | return ret;
|
---|
351 | }
|
---|
352 |
|
---|
353 | /*
|
---|
354 | * Atomic sendfile takes care to not write anything to the socket
|
---|
355 | * until all of the requested bytes have been read from the file.
|
---|
356 | * There are two atomic cases that need to be handled.
|
---|
357 | *
|
---|
358 | * 1. The file was truncated causing less data to be read than was
|
---|
359 | * requested. In this case, we return back to the caller to
|
---|
360 | * indicate 0 bytes were written to the socket. This should
|
---|
361 | * prompt the caller to fallback to the standard read path: read
|
---|
362 | * the data, create a header that indicates how many bytes were
|
---|
363 | * actually read, and send the header/data back to the client.
|
---|
364 | *
|
---|
365 | * This saves us from standard sendfile behavior of sending a
|
---|
366 | * header promising more data then will actually be sent. The
|
---|
367 | * only two options are to close the socket and kill the client
|
---|
368 | * connection, or write a bunch of 0s. Closing the client
|
---|
369 | * connection is bad because there could actually be multiple
|
---|
370 | * sessions multiplexed from the same client that are all dropped
|
---|
371 | * because of a truncate. Writing the remaining data as 0s also
|
---|
372 | * isn't good, because the client will have an incorrect version
|
---|
373 | * of the file. If the file is written back to the server, the 0s
|
---|
374 | * will be written back. Fortunately, atomic sendfile allows us
|
---|
375 | * to avoid making this choice in most cases.
|
---|
376 | *
|
---|
377 | * 2. One downside of atomic sendfile, is that there is a limit on
|
---|
378 | * the number of bytes that can be sent atomically. The kernel
|
---|
379 | * has a limited amount of mbuf space that it can read file data
|
---|
380 | * into without exhausting the system's mbufs, so a buffer of
|
---|
381 | * length xfsize is used. The xfsize at the time of writing this
|
---|
382 | * is 64K. xfsize bytes are read from the file, and subsequently
|
---|
383 | * written to the socket. This makes it impossible to do the
|
---|
384 | * sendfile atomically for a byte count > xfsize.
|
---|
385 | *
|
---|
386 | * To cope with large requests, atomic sendfile returns -1 with
|
---|
387 | * errno set to E2BIG. Since windows maxes out at 64K writes,
|
---|
388 | * this is currently only a concern with non-windows clients.
|
---|
389 | * Posix extensions allow the full 24bit bytecount field to be
|
---|
390 | * used in ReadAndX, and clients such as smbclient and the linux
|
---|
391 | * cifs client can request up to 16MB reads! There are a few
|
---|
392 | * options for handling large sendfile requests.
|
---|
393 | *
|
---|
394 | * a. Fall back to the standard read path. This is unacceptable
|
---|
395 | * because it would require prohibitively large mallocs.
|
---|
396 | *
|
---|
397 | * b. Fall back to using samba's fake_send_file which emulates
|
---|
398 | * the kernel sendfile in userspace. This still has the same
|
---|
399 | * problem of sending the header before all of the data has
|
---|
400 | * been read, so it doesn't buy us anything, and has worse
|
---|
401 | * performance than the kernel's zero-copy sendfile.
|
---|
402 | *
|
---|
403 | * c. Use non-atomic sendfile syscall to attempt a zero copy
|
---|
404 | * read, and hope that there isn't a short read due to
|
---|
405 | * truncation. In the case of a short read, there are two
|
---|
406 | * options:
|
---|
407 | *
|
---|
408 | * 1. Kill the client connection
|
---|
409 | *
|
---|
410 | * 2. Write zeros to the socket for the remaining bytes
|
---|
411 | * promised in the header.
|
---|
412 | *
|
---|
413 | * It is safer from a data corruption perspective to kill the
|
---|
414 | * client connection, so this is our default behavior, but if
|
---|
415 | * this causes problems this can be configured to write zeros
|
---|
416 | * via smb.conf.
|
---|
417 | */
|
---|
418 |
|
---|
419 | /* Handle case 1: short read -> truncated file. */
|
---|
420 | if (ret == 0) {
|
---|
421 | END_PROFILE(syscall_sendfile);
|
---|
422 | return ret;
|
---|
423 | }
|
---|
424 |
|
---|
425 | /* Handle case 2: large read. */
|
---|
426 | if (ret == -1 && errno == E2BIG) {
|
---|
427 |
|
---|
428 | if (!lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
---|
429 | PARM_SENDFILE_LARGE_READS,
|
---|
430 | PARM_SENDFILE_LARGE_READS_DEFAULT)) {
|
---|
431 | DEBUG(3, ("Not attempting non-atomic large sendfile: "
|
---|
432 | "%lu bytes\n", count));
|
---|
433 | END_PROFILE(syscall_sendfile);
|
---|
434 | return 0;
|
---|
435 | }
|
---|
436 |
|
---|
437 | if (count < 0x10000) {
|
---|
438 | DEBUG(0, ("Count < 2^16 and E2BIG was returned! %lu\n",
|
---|
439 | count));
|
---|
440 | }
|
---|
441 |
|
---|
442 | DEBUG(10, ("attempting non-atomic large sendfile: %lu bytes\n",
|
---|
443 | count));
|
---|
444 |
|
---|
445 | /* Try a non-atomic sendfile. */
|
---|
446 | ret = onefs_sys_do_sendfile(tofd, fromfd, header, offset,
|
---|
447 | count, false);
|
---|
448 | /* Real error: kill the client connection. */
|
---|
449 | if (ret == -1) {
|
---|
450 | DEBUG(1, ("error on non-atomic large sendfile "
|
---|
451 | "(%lu bytes): %s\n", count,
|
---|
452 | strerror(errno)));
|
---|
453 | END_PROFILE(syscall_sendfile);
|
---|
454 | return ret;
|
---|
455 | }
|
---|
456 |
|
---|
457 | /* Short read: kill the client connection. */
|
---|
458 | if (ret != count + header->length) {
|
---|
459 | DEBUG(1, ("short read on non-atomic large sendfile "
|
---|
460 | "(%lu of %lu bytes): %s\n", ret, count,
|
---|
461 | strerror(errno)));
|
---|
462 |
|
---|
463 | /*
|
---|
464 | * Returning ret here would cause us to drop into the
|
---|
465 | * codepath that calls sendfile_short_send, which
|
---|
466 | * sends the client a bunch of zeros instead.
|
---|
467 | * Returning -1 kills the connection.
|
---|
468 | */
|
---|
469 | if (lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
---|
470 | PARM_SENDFILE_SAFE,
|
---|
471 | PARM_SENDFILE_SAFE_DEFAULT)) {
|
---|
472 | END_PROFILE(syscall_sendfile);
|
---|
473 | return -1;
|
---|
474 | }
|
---|
475 |
|
---|
476 | END_PROFILE(syscall_sendfile);
|
---|
477 | return ret;
|
---|
478 | }
|
---|
479 |
|
---|
480 | DEBUG(10, ("non-atomic large sendfile successful\n"));
|
---|
481 | }
|
---|
482 |
|
---|
483 | /* There was error in the atomic sendfile. */
|
---|
484 | if (ret == -1) {
|
---|
485 | DEBUG(1, ("error on %s sendfile (%lu bytes): %s\n",
|
---|
486 | atomic ? "atomic" : "non-atomic",
|
---|
487 | count, strerror(errno)));
|
---|
488 | }
|
---|
489 |
|
---|
490 | END_PROFILE(syscall_sendfile);
|
---|
491 | return ret;
|
---|
492 | }
|
---|
493 |
|
---|
494 | /**
|
---|
495 | * Only talloc the spill buffer once (reallocing when necessary).
|
---|
496 | */
|
---|
497 | static char *get_spill_buffer(size_t new_count)
|
---|
498 | {
|
---|
499 | static int cur_count = 0;
|
---|
500 | static char *spill_buffer = NULL;
|
---|
501 |
|
---|
502 | /* If a sufficiently sized buffer exists, just return. */
|
---|
503 | if (new_count <= cur_count) {
|
---|
504 | SMB_ASSERT(spill_buffer);
|
---|
505 | return spill_buffer;
|
---|
506 | }
|
---|
507 |
|
---|
508 | /* Allocate the first time. */
|
---|
509 | if (cur_count == 0) {
|
---|
510 | SMB_ASSERT(!spill_buffer);
|
---|
511 | spill_buffer = talloc_array(NULL, char, new_count);
|
---|
512 | if (spill_buffer) {
|
---|
513 | cur_count = new_count;
|
---|
514 | }
|
---|
515 | return spill_buffer;
|
---|
516 | }
|
---|
517 |
|
---|
518 | /* A buffer exists, but it's not big enough, so realloc. */
|
---|
519 | SMB_ASSERT(spill_buffer);
|
---|
520 | spill_buffer = talloc_realloc(NULL, spill_buffer, char, new_count);
|
---|
521 | if (spill_buffer) {
|
---|
522 | cur_count = new_count;
|
---|
523 | }
|
---|
524 | return spill_buffer;
|
---|
525 | }
|
---|
526 |
|
---|
527 | /**
|
---|
528 | * recvfile does zero-copy writes given an fd to write to, and a socket with
|
---|
529 | * some data to write. If recvfile read more than it was able to write, it
|
---|
530 | * spills the data into a buffer. After first reading any additional data
|
---|
531 | * from the socket into the buffer, the spill buffer is then written with a
|
---|
532 | * standard pwrite.
|
---|
533 | */
|
---|
534 | ssize_t onefs_sys_recvfile(int fromfd, int tofd, SMB_OFF_T offset,
|
---|
535 | size_t count)
|
---|
536 | {
|
---|
537 | char *spill_buffer = NULL;
|
---|
538 | bool socket_drained = false;
|
---|
539 | int ret;
|
---|
540 | off_t total_rbytes = 0;
|
---|
541 | off_t total_wbytes = 0;
|
---|
542 | off_t rbytes;
|
---|
543 | off_t wbytes;
|
---|
544 |
|
---|
545 | START_PROFILE_BYTES(syscall_recvfile, count);
|
---|
546 |
|
---|
547 | DEBUG(10,("onefs_recvfile: from = %d, to = %d, offset=%llu, count = "
|
---|
548 | "%lu\n", fromfd, tofd, offset, count));
|
---|
549 |
|
---|
550 | if (count == 0) {
|
---|
551 | END_PROFILE(syscall_recvfile);
|
---|
552 | return 0;
|
---|
553 | }
|
---|
554 |
|
---|
555 | /*
|
---|
556 | * Setup up a buffer for recvfile to spill data that has been read
|
---|
557 | * from the socket but not written.
|
---|
558 | */
|
---|
559 | spill_buffer = get_spill_buffer(count);
|
---|
560 | if (spill_buffer == NULL) {
|
---|
561 | ret = -1;
|
---|
562 | goto out;
|
---|
563 | }
|
---|
564 |
|
---|
565 | /*
|
---|
566 | * Keep trying recvfile until:
|
---|
567 | * - There is no data left to read on the socket, or
|
---|
568 | * - bytes read != bytes written, or
|
---|
569 | * - An error is returned that isn't EINTR/EAGAIN
|
---|
570 | */
|
---|
571 | do {
|
---|
572 | /* Keep track of bytes read/written for recvfile */
|
---|
573 | rbytes = 0;
|
---|
574 | wbytes = 0;
|
---|
575 |
|
---|
576 | DEBUG(10, ("calling recvfile loop, offset + total_wbytes = "
|
---|
577 | "%llu, count - total_rbytes = %llu\n",
|
---|
578 | offset + total_wbytes, count - total_rbytes));
|
---|
579 |
|
---|
580 | ret = recvfile(tofd, fromfd, offset + total_wbytes,
|
---|
581 | count - total_wbytes, &rbytes, &wbytes, 0,
|
---|
582 | spill_buffer);
|
---|
583 |
|
---|
584 | DEBUG(10, ("recvfile ret = %d, errno = %d, rbytes = %llu, "
|
---|
585 | "wbytes = %llu\n", ret, ret >= 0 ? 0 : errno,
|
---|
586 | rbytes, wbytes));
|
---|
587 |
|
---|
588 | /* Update our progress so far */
|
---|
589 | total_rbytes += rbytes;
|
---|
590 | total_wbytes += wbytes;
|
---|
591 |
|
---|
592 | } while ((count - total_rbytes) && (rbytes == wbytes) &&
|
---|
593 | (ret == -1 && (errno == EINTR || errno == EAGAIN)));
|
---|
594 |
|
---|
595 | DEBUG(10, ("total_rbytes = %llu, total_wbytes = %llu\n",
|
---|
596 | total_rbytes, total_wbytes));
|
---|
597 |
|
---|
598 | /* Log if recvfile didn't write everything it read. */
|
---|
599 | if (total_rbytes != total_wbytes) {
|
---|
600 | DEBUG(3, ("partial recvfile: total_rbytes=%llu but "
|
---|
601 | "total_wbytes=%llu, diff = %llu\n", total_rbytes,
|
---|
602 | total_wbytes, total_rbytes - total_wbytes));
|
---|
603 | SMB_ASSERT(total_rbytes > total_wbytes);
|
---|
604 | }
|
---|
605 |
|
---|
606 | /*
|
---|
607 | * If there is still data on the socket, read it off.
|
---|
608 | */
|
---|
609 | while (total_rbytes < count) {
|
---|
610 |
|
---|
611 | DEBUG(3, ("shallow recvfile (%s), reading %llu\n",
|
---|
612 | strerror(errno), count - total_rbytes));
|
---|
613 |
|
---|
614 | /*
|
---|
615 | * Read the remaining data into the spill buffer. recvfile
|
---|
616 | * may already have some data in the spill buffer, so start
|
---|
617 | * filling the buffer at total_rbytes - total_wbytes.
|
---|
618 | */
|
---|
619 | ret = sys_read(fromfd,
|
---|
620 | spill_buffer + (total_rbytes - total_wbytes),
|
---|
621 | count - total_rbytes);
|
---|
622 |
|
---|
623 | if (ret <= 0) {
|
---|
624 | if (ret == 0) {
|
---|
625 | DEBUG(0, ("shallow recvfile read: EOF\n"));
|
---|
626 | } else {
|
---|
627 | DEBUG(0, ("shallow recvfile read failed: %s\n",
|
---|
628 | strerror(errno)));
|
---|
629 | }
|
---|
630 | /* Socket is dead, so treat as if it were drained. */
|
---|
631 | socket_drained = true;
|
---|
632 | goto out;
|
---|
633 | }
|
---|
634 |
|
---|
635 | /* Data was read so update the rbytes */
|
---|
636 | total_rbytes += ret;
|
---|
637 | }
|
---|
638 |
|
---|
639 | if (total_rbytes != count) {
|
---|
640 | smb_panic("Unread recvfile data still on the socket!");
|
---|
641 | }
|
---|
642 |
|
---|
643 | /*
|
---|
644 | * Now write any spilled data + the extra data read off the socket.
|
---|
645 | */
|
---|
646 | while (total_wbytes < count) {
|
---|
647 |
|
---|
648 | DEBUG(3, ("partial recvfile, writing %llu\n", count - total_wbytes));
|
---|
649 |
|
---|
650 | ret = sys_pwrite(tofd, spill_buffer, count - total_wbytes,
|
---|
651 | offset + total_wbytes);
|
---|
652 |
|
---|
653 | if (ret == -1) {
|
---|
654 | DEBUG(0, ("partial recvfile write failed: %s\n",
|
---|
655 | strerror(errno)));
|
---|
656 | goto out;
|
---|
657 | }
|
---|
658 |
|
---|
659 | /* Data was written so update the wbytes */
|
---|
660 | total_wbytes += ret;
|
---|
661 | }
|
---|
662 |
|
---|
663 | /* Success! */
|
---|
664 | ret = total_wbytes;
|
---|
665 |
|
---|
666 | out:
|
---|
667 |
|
---|
668 | END_PROFILE(syscall_recvfile);
|
---|
669 |
|
---|
670 | /* Make sure we always try to drain the socket. */
|
---|
671 | if (!socket_drained && count - total_rbytes) {
|
---|
672 | int saved_errno = errno;
|
---|
673 |
|
---|
674 | if (drain_socket(fromfd, count - total_rbytes) !=
|
---|
675 | count - total_rbytes) {
|
---|
676 | /* Socket is dead! */
|
---|
677 | DEBUG(0, ("drain socket failed: %d\n", errno));
|
---|
678 | }
|
---|
679 | errno = saved_errno;
|
---|
680 | }
|
---|
681 |
|
---|
682 | return ret;
|
---|
683 | }
|
---|
684 |
|
---|
685 | void init_stat_ex_from_onefs_stat(struct stat_ex *dst, const struct stat *src)
|
---|
686 | {
|
---|
687 | ZERO_STRUCT(*dst);
|
---|
688 |
|
---|
689 | dst->st_ex_dev = src->st_dev;
|
---|
690 | dst->st_ex_ino = src->st_ino;
|
---|
691 | dst->st_ex_mode = src->st_mode;
|
---|
692 | dst->st_ex_nlink = src->st_nlink;
|
---|
693 | dst->st_ex_uid = src->st_uid;
|
---|
694 | dst->st_ex_gid = src->st_gid;
|
---|
695 | dst->st_ex_rdev = src->st_rdev;
|
---|
696 | dst->st_ex_size = src->st_size;
|
---|
697 | dst->st_ex_atime = src->st_atimespec;
|
---|
698 | dst->st_ex_mtime = src->st_mtimespec;
|
---|
699 | dst->st_ex_ctime = src->st_ctimespec;
|
---|
700 | dst->st_ex_btime = src->st_birthtimespec;
|
---|
701 | dst->st_ex_blksize = src->st_blksize;
|
---|
702 | dst->st_ex_blocks = src->st_blocks;
|
---|
703 |
|
---|
704 | dst->st_ex_flags = src->st_flags;
|
---|
705 |
|
---|
706 | dst->vfs_private = src->st_snapid;
|
---|
707 | }
|
---|
708 |
|
---|
709 | int onefs_sys_stat(const char *fname, SMB_STRUCT_STAT *sbuf)
|
---|
710 | {
|
---|
711 | int ret;
|
---|
712 | struct stat onefs_sbuf;
|
---|
713 |
|
---|
714 | ret = stat(fname, &onefs_sbuf);
|
---|
715 |
|
---|
716 | if (ret == 0) {
|
---|
717 | /* we always want directories to appear zero size */
|
---|
718 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
---|
719 | onefs_sbuf.st_size = 0;
|
---|
720 | }
|
---|
721 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
---|
722 | }
|
---|
723 | return ret;
|
---|
724 | }
|
---|
725 |
|
---|
726 | int onefs_sys_fstat(int fd, SMB_STRUCT_STAT *sbuf)
|
---|
727 | {
|
---|
728 | int ret;
|
---|
729 | struct stat onefs_sbuf;
|
---|
730 |
|
---|
731 | ret = fstat(fd, &onefs_sbuf);
|
---|
732 |
|
---|
733 | if (ret == 0) {
|
---|
734 | /* we always want directories to appear zero size */
|
---|
735 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
---|
736 | onefs_sbuf.st_size = 0;
|
---|
737 | }
|
---|
738 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
---|
739 | }
|
---|
740 | return ret;
|
---|
741 | }
|
---|
742 |
|
---|
743 | int onefs_sys_fstat_at(int base_fd, const char *fname, SMB_STRUCT_STAT *sbuf,
|
---|
744 | int flags)
|
---|
745 | {
|
---|
746 | int ret;
|
---|
747 | struct stat onefs_sbuf;
|
---|
748 |
|
---|
749 | ret = enc_fstatat(base_fd, fname, ENC_DEFAULT, &onefs_sbuf, flags);
|
---|
750 |
|
---|
751 | if (ret == 0) {
|
---|
752 | /* we always want directories to appear zero size */
|
---|
753 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
---|
754 | onefs_sbuf.st_size = 0;
|
---|
755 | }
|
---|
756 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
---|
757 | }
|
---|
758 | return ret;
|
---|
759 | }
|
---|
760 |
|
---|
761 | int onefs_sys_lstat(const char *fname, SMB_STRUCT_STAT *sbuf)
|
---|
762 | {
|
---|
763 | int ret;
|
---|
764 | struct stat onefs_sbuf;
|
---|
765 |
|
---|
766 | ret = lstat(fname, &onefs_sbuf);
|
---|
767 |
|
---|
768 | if (ret == 0) {
|
---|
769 | /* we always want directories to appear zero size */
|
---|
770 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
---|
771 | onefs_sbuf.st_size = 0;
|
---|
772 | }
|
---|
773 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
---|
774 | }
|
---|
775 | return ret;
|
---|
776 | }
|
---|
777 |
|
---|