| 1 | /*
|
|---|
| 2 | * Unix SMB/CIFS implementation.
|
|---|
| 3 | * Support for OneFS system interfaces.
|
|---|
| 4 | *
|
|---|
| 5 | * Copyright (C) Tim Prouty, 2008
|
|---|
| 6 | *
|
|---|
| 7 | * This program is free software; you can redistribute it and/or modify
|
|---|
| 8 | * it under the terms of the GNU General Public License as published by
|
|---|
| 9 | * the Free Software Foundation; either version 3 of the License, or
|
|---|
| 10 | * (at your option) any later version.
|
|---|
| 11 | *
|
|---|
| 12 | * This program is distributed in the hope that it will be useful,
|
|---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 15 | * GNU General Public License for more details.
|
|---|
| 16 | *
|
|---|
| 17 | * You should have received a copy of the GNU General Public License
|
|---|
| 18 | * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|---|
| 19 | */
|
|---|
| 20 |
|
|---|
| 21 | #include "includes.h"
|
|---|
| 22 | #include "smbd/smbd.h"
|
|---|
| 23 | #include "onefs.h"
|
|---|
| 24 | #include "onefs_config.h"
|
|---|
| 25 | #include "oplock_onefs.h"
|
|---|
| 26 |
|
|---|
| 27 | #include <ifs/ifs_syscalls.h>
|
|---|
| 28 | #include <isi_acl/isi_acl_util.h>
|
|---|
| 29 | #include <sys/isi_acl.h>
|
|---|
| 30 |
|
|---|
| 31 | /*
|
|---|
| 32 | * Initialize the sm_lock struct before passing it to ifs_createfile.
|
|---|
| 33 | */
|
|---|
| 34 | static void smlock_init(connection_struct *conn, struct sm_lock *sml,
|
|---|
| 35 | bool isexe, uint32_t access_mask, uint32_t share_access,
|
|---|
| 36 | uint32_t create_options)
|
|---|
| 37 | {
|
|---|
| 38 | sml->sm_type.doc = false;
|
|---|
| 39 | sml->sm_type.isexe = isexe;
|
|---|
| 40 | sml->sm_type.statonly = is_stat_open(access_mask);
|
|---|
| 41 | sml->sm_type.access_mask = access_mask;
|
|---|
| 42 | sml->sm_type.share_access = share_access;
|
|---|
| 43 |
|
|---|
| 44 | /*
|
|---|
| 45 | * private_options was previously used for DENY_DOS/DENY_FCB checks in
|
|---|
| 46 | * the kernel, but are now properly handled by fcb_or_dos_open. In
|
|---|
| 47 | * these cases, ifs_createfile will return a sharing violation, which
|
|---|
| 48 | * gives fcb_or_dos_open the chance to open a duplicate file handle.
|
|---|
| 49 | */
|
|---|
| 50 | sml->sm_type.private_options = 0;
|
|---|
| 51 |
|
|---|
| 52 | /* 1 second delay is handled in onefs_open.c by deferring the open */
|
|---|
| 53 | sml->sm_timeout = timeval_set(0, 0);
|
|---|
| 54 | }
|
|---|
| 55 |
|
|---|
| 56 | static void smlock_dump(int debuglevel, const struct sm_lock *sml)
|
|---|
| 57 | {
|
|---|
| 58 | if (sml == NULL) {
|
|---|
| 59 | DEBUG(debuglevel, ("sml == NULL\n"));
|
|---|
| 60 | return;
|
|---|
| 61 | }
|
|---|
| 62 |
|
|---|
| 63 | DEBUG(debuglevel,
|
|---|
| 64 | ("smlock: doc=%s, isexec=%s, statonly=%s, access_mask=0x%x, "
|
|---|
| 65 | "share_access=0x%x, private_options=0x%x timeout=%d/%d\n",
|
|---|
| 66 | sml->sm_type.doc ? "True" : "False",
|
|---|
| 67 | sml->sm_type.isexe ? "True" : "False",
|
|---|
| 68 | sml->sm_type.statonly ? "True" : "False",
|
|---|
| 69 | sml->sm_type.access_mask,
|
|---|
| 70 | sml->sm_type.share_access,
|
|---|
| 71 | sml->sm_type.private_options,
|
|---|
| 72 | (int)sml->sm_timeout.tv_sec,
|
|---|
| 73 | (int)sml->sm_timeout.tv_usec));
|
|---|
| 74 | }
|
|---|
| 75 |
|
|---|
| 76 | /**
|
|---|
| 77 | * External interface to ifs_createfile
|
|---|
| 78 | */
|
|---|
| 79 | int onefs_sys_create_file(connection_struct *conn,
|
|---|
| 80 | int base_fd,
|
|---|
| 81 | const char *path,
|
|---|
| 82 | uint32_t access_mask,
|
|---|
| 83 | uint32_t open_access_mask,
|
|---|
| 84 | uint32_t share_access,
|
|---|
| 85 | uint32_t create_options,
|
|---|
| 86 | int flags,
|
|---|
| 87 | mode_t mode,
|
|---|
| 88 | int oplock_request,
|
|---|
| 89 | uint64_t id,
|
|---|
| 90 | struct security_descriptor *sd,
|
|---|
| 91 | uint32_t dos_flags,
|
|---|
| 92 | int *granted_oplock)
|
|---|
| 93 | {
|
|---|
| 94 | struct sm_lock sml, *psml = NULL;
|
|---|
| 95 | enum oplock_type onefs_oplock;
|
|---|
| 96 | enum oplock_type onefs_granted_oplock = OPLOCK_NONE;
|
|---|
| 97 | struct ifs_security_descriptor ifs_sd = {}, *pifs_sd = NULL;
|
|---|
| 98 | uint32_t sec_info_effective = 0;
|
|---|
| 99 | int ret_fd = -1;
|
|---|
| 100 | uint32_t onefs_dos_attributes;
|
|---|
| 101 | struct ifs_createfile_flags cf_flags = CF_FLAGS_NONE;
|
|---|
| 102 | char *mapped_name = NULL;
|
|---|
| 103 | NTSTATUS result;
|
|---|
| 104 |
|
|---|
| 105 | START_PROFILE(syscall_createfile);
|
|---|
| 106 |
|
|---|
| 107 | /* Translate the name to UNIX before calling ifs_createfile */
|
|---|
| 108 | mapped_name = talloc_strdup(talloc_tos(), path);
|
|---|
| 109 | if (mapped_name == NULL) {
|
|---|
| 110 | errno = ENOMEM;
|
|---|
| 111 | goto out;
|
|---|
| 112 | }
|
|---|
| 113 | result = SMB_VFS_TRANSLATE_NAME(conn, &mapped_name,
|
|---|
| 114 | vfs_translate_to_unix);
|
|---|
| 115 | if (!NT_STATUS_IS_OK(result)) {
|
|---|
| 116 | goto out;
|
|---|
| 117 | }
|
|---|
| 118 |
|
|---|
| 119 | /* Setup security descriptor and get secinfo. */
|
|---|
| 120 | if (sd != NULL) {
|
|---|
| 121 | NTSTATUS status;
|
|---|
| 122 | uint32_t sec_info_sent = 0;
|
|---|
| 123 |
|
|---|
| 124 | sec_info_sent = (get_sec_info(sd) & IFS_SEC_INFO_KNOWN_MASK);
|
|---|
| 125 |
|
|---|
| 126 | status = onefs_samba_sd_to_sd(sec_info_sent, sd, &ifs_sd,
|
|---|
| 127 | SNUM(conn), &sec_info_effective);
|
|---|
| 128 |
|
|---|
| 129 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 130 | DEBUG(1, ("SD initialization failure: %s\n",
|
|---|
| 131 | nt_errstr(status)));
|
|---|
| 132 | errno = EINVAL;
|
|---|
| 133 | goto out;
|
|---|
| 134 | }
|
|---|
| 135 |
|
|---|
| 136 | pifs_sd = &ifs_sd;
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | /* Stripping off private bits will be done for us. */
|
|---|
| 140 | onefs_oplock = onefs_samba_oplock_to_oplock(oplock_request);
|
|---|
| 141 |
|
|---|
| 142 | if (!lp_oplocks(SNUM(conn))) {
|
|---|
| 143 | SMB_ASSERT(onefs_oplock == OPLOCK_NONE);
|
|---|
| 144 | }
|
|---|
| 145 |
|
|---|
| 146 | /* Convert samba dos flags to UF_DOS_* attributes. */
|
|---|
| 147 | onefs_dos_attributes = dos_attributes_to_stat_dos_flags(dos_flags);
|
|---|
| 148 |
|
|---|
| 149 | /**
|
|---|
| 150 | * Deal with kernel creating Default ACLs. (Isilon bug 47447.)
|
|---|
| 151 | *
|
|---|
| 152 | * 1) "nt acl support = no", default_acl = no
|
|---|
| 153 | * 2) "inherit permissions = yes", default_acl = no
|
|---|
| 154 | */
|
|---|
| 155 | if (lp_nt_acl_support(SNUM(conn)) && !lp_inherit_perms(SNUM(conn)))
|
|---|
| 156 | cf_flags = cf_flags_or(cf_flags, CF_FLAGS_DEFAULT_ACL);
|
|---|
| 157 |
|
|---|
| 158 | /*
|
|---|
| 159 | * Some customer workflows require the execute bit to be ignored.
|
|---|
| 160 | */
|
|---|
| 161 | if (lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
|---|
| 162 | PARM_ALLOW_EXECUTE_ALWAYS,
|
|---|
| 163 | PARM_ALLOW_EXECUTE_ALWAYS_DEFAULT) &&
|
|---|
| 164 | (open_access_mask & FILE_EXECUTE)) {
|
|---|
| 165 |
|
|---|
| 166 | DEBUG(3, ("Stripping execute bit from %s: (0x%x)\n", mapped_name,
|
|---|
| 167 | open_access_mask));
|
|---|
| 168 |
|
|---|
| 169 | /* Strip execute. */
|
|---|
| 170 | open_access_mask &= ~FILE_EXECUTE;
|
|---|
| 171 |
|
|---|
| 172 | /*
|
|---|
| 173 | * Add READ_DATA, so we're not left with desired_access=0. An
|
|---|
| 174 | * execute call should imply the client will read the data.
|
|---|
| 175 | */
|
|---|
| 176 | open_access_mask |= FILE_READ_DATA;
|
|---|
| 177 |
|
|---|
| 178 | DEBUGADD(3, ("New stripped access mask: 0x%x\n",
|
|---|
| 179 | open_access_mask));
|
|---|
| 180 | }
|
|---|
| 181 |
|
|---|
| 182 | DEBUG(10,("onefs_sys_create_file: base_fd = %d, fname = %s "
|
|---|
| 183 | "open_access_mask = 0x%x, flags = 0x%x, mode = 0%o, "
|
|---|
| 184 | "desired_oplock = %s, id = 0x%x, secinfo = 0x%x, sd = %p, "
|
|---|
| 185 | "dos_attributes = 0x%x, path = %s, "
|
|---|
| 186 | "default_acl=%s\n", base_fd, mapped_name,
|
|---|
| 187 | (unsigned int)open_access_mask,
|
|---|
| 188 | (unsigned int)flags,
|
|---|
| 189 | (unsigned int)mode,
|
|---|
| 190 | onefs_oplock_str(onefs_oplock),
|
|---|
| 191 | (unsigned int)id,
|
|---|
| 192 | sec_info_effective, sd,
|
|---|
| 193 | (unsigned int)onefs_dos_attributes, mapped_name,
|
|---|
| 194 | cf_flags_and_bool(cf_flags, CF_FLAGS_DEFAULT_ACL) ?
|
|---|
| 195 | "true" : "false"));
|
|---|
| 196 |
|
|---|
| 197 | /* Initialize smlock struct for files/dirs but not internal opens */
|
|---|
| 198 | if (!(oplock_request & INTERNAL_OPEN_ONLY)) {
|
|---|
| 199 | smlock_init(conn, &sml, is_executable(mapped_name), access_mask,
|
|---|
| 200 | share_access, create_options);
|
|---|
| 201 | psml = &sml;
|
|---|
| 202 | }
|
|---|
| 203 |
|
|---|
| 204 | smlock_dump(10, psml);
|
|---|
| 205 |
|
|---|
| 206 | ret_fd = ifs_createfile(base_fd, mapped_name,
|
|---|
| 207 | (enum ifs_ace_rights)open_access_mask, flags & ~O_ACCMODE, mode,
|
|---|
| 208 | onefs_oplock, id, psml, sec_info_effective, pifs_sd,
|
|---|
| 209 | onefs_dos_attributes, cf_flags, &onefs_granted_oplock);
|
|---|
| 210 |
|
|---|
| 211 | DEBUG(10,("onefs_sys_create_file(%s): ret_fd = %d, "
|
|---|
| 212 | "onefs_granted_oplock = %s\n",
|
|---|
| 213 | ret_fd < 0 ? strerror(errno) : "success", ret_fd,
|
|---|
| 214 | onefs_oplock_str(onefs_granted_oplock)));
|
|---|
| 215 |
|
|---|
| 216 | if (granted_oplock) {
|
|---|
| 217 | *granted_oplock =
|
|---|
| 218 | onefs_oplock_to_samba_oplock(onefs_granted_oplock);
|
|---|
| 219 | }
|
|---|
| 220 |
|
|---|
| 221 | out:
|
|---|
| 222 | END_PROFILE(syscall_createfile);
|
|---|
| 223 | aclu_free_sd(pifs_sd, false);
|
|---|
| 224 | TALLOC_FREE(mapped_name);
|
|---|
| 225 |
|
|---|
| 226 | return ret_fd;
|
|---|
| 227 | }
|
|---|
| 228 |
|
|---|
| 229 | /**
|
|---|
| 230 | * FreeBSD based sendfile implementation that allows for atomic semantics.
|
|---|
| 231 | */
|
|---|
| 232 | static ssize_t onefs_sys_do_sendfile(int tofd, int fromfd,
|
|---|
| 233 | const DATA_BLOB *header, SMB_OFF_T offset, size_t count, bool atomic)
|
|---|
| 234 | {
|
|---|
| 235 | size_t total=0;
|
|---|
| 236 | struct sf_hdtr hdr;
|
|---|
| 237 | struct iovec hdtrl;
|
|---|
| 238 | size_t hdr_len = 0;
|
|---|
| 239 | int flags = 0;
|
|---|
| 240 |
|
|---|
| 241 | if (atomic) {
|
|---|
| 242 | flags = SF_ATOMIC;
|
|---|
| 243 | }
|
|---|
| 244 |
|
|---|
| 245 | hdr.headers = &hdtrl;
|
|---|
| 246 | hdr.hdr_cnt = 1;
|
|---|
| 247 | hdr.trailers = NULL;
|
|---|
| 248 | hdr.trl_cnt = 0;
|
|---|
| 249 |
|
|---|
| 250 | /* Set up the header iovec. */
|
|---|
| 251 | if (header) {
|
|---|
| 252 | hdtrl.iov_base = (void *)header->data;
|
|---|
| 253 | hdtrl.iov_len = hdr_len = header->length;
|
|---|
| 254 | } else {
|
|---|
| 255 | hdtrl.iov_base = NULL;
|
|---|
| 256 | hdtrl.iov_len = 0;
|
|---|
| 257 | }
|
|---|
| 258 |
|
|---|
| 259 | total = count;
|
|---|
| 260 | while (total + hdtrl.iov_len) {
|
|---|
| 261 | SMB_OFF_T nwritten;
|
|---|
| 262 | int ret;
|
|---|
| 263 |
|
|---|
| 264 | /*
|
|---|
| 265 | * FreeBSD sendfile returns 0 on success, -1 on error.
|
|---|
| 266 | * Remember, the tofd and fromfd are reversed..... :-).
|
|---|
| 267 | * nwritten includes the header data sent.
|
|---|
| 268 | */
|
|---|
| 269 |
|
|---|
| 270 | do {
|
|---|
| 271 | ret = sendfile(fromfd, tofd, offset, total, &hdr,
|
|---|
| 272 | &nwritten, flags);
|
|---|
| 273 | #if defined(EWOULDBLOCK)
|
|---|
| 274 | } while (ret == -1 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK));
|
|---|
| 275 | #else
|
|---|
| 276 | } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
|
|---|
| 277 | #endif
|
|---|
| 278 |
|
|---|
| 279 | /* On error we're done. */
|
|---|
| 280 | if (ret == -1) {
|
|---|
| 281 | return -1;
|
|---|
| 282 | }
|
|---|
| 283 |
|
|---|
| 284 | /*
|
|---|
| 285 | * If this was an ATOMIC sendfile, nwritten doesn't
|
|---|
| 286 | * necessarily indicate an error. It could mean count > than
|
|---|
| 287 | * what sendfile can handle atomically (usually 64K) or that
|
|---|
| 288 | * there was a short read due to the file being truncated.
|
|---|
| 289 | */
|
|---|
| 290 | if (nwritten == 0) {
|
|---|
| 291 | return atomic ? 0 : -1;
|
|---|
| 292 | }
|
|---|
| 293 |
|
|---|
| 294 | /*
|
|---|
| 295 | * An atomic sendfile should never send partial data!
|
|---|
| 296 | */
|
|---|
| 297 | if (atomic && nwritten != total + hdtrl.iov_len) {
|
|---|
| 298 | DEBUG(0,("Atomic sendfile() sent partial data: "
|
|---|
| 299 | "%llu of %d\n", nwritten,
|
|---|
| 300 | total + hdtrl.iov_len));
|
|---|
| 301 | return -1;
|
|---|
| 302 | }
|
|---|
| 303 |
|
|---|
| 304 | /*
|
|---|
| 305 | * If this was a short (signal interrupted) write we may need
|
|---|
| 306 | * to subtract it from the header data, or null out the header
|
|---|
| 307 | * data altogether if we wrote more than hdtrl.iov_len bytes.
|
|---|
| 308 | * We change nwritten to be the number of file bytes written.
|
|---|
| 309 | */
|
|---|
| 310 |
|
|---|
| 311 | if (hdtrl.iov_base && hdtrl.iov_len) {
|
|---|
| 312 | if (nwritten >= hdtrl.iov_len) {
|
|---|
| 313 | nwritten -= hdtrl.iov_len;
|
|---|
| 314 | hdtrl.iov_base = NULL;
|
|---|
| 315 | hdtrl.iov_len = 0;
|
|---|
| 316 | } else {
|
|---|
| 317 | hdtrl.iov_base =
|
|---|
| 318 | (void *)((caddr_t)hdtrl.iov_base + nwritten);
|
|---|
| 319 | hdtrl.iov_len -= nwritten;
|
|---|
| 320 | nwritten = 0;
|
|---|
| 321 | }
|
|---|
| 322 | }
|
|---|
| 323 | total -= nwritten;
|
|---|
| 324 | offset += nwritten;
|
|---|
| 325 | }
|
|---|
| 326 | return count + hdr_len;
|
|---|
| 327 | }
|
|---|
| 328 |
|
|---|
| 329 | /**
|
|---|
| 330 | * Handles the subtleties of using sendfile with CIFS.
|
|---|
| 331 | */
|
|---|
| 332 | ssize_t onefs_sys_sendfile(connection_struct *conn, int tofd, int fromfd,
|
|---|
| 333 | const DATA_BLOB *header, SMB_OFF_T offset,
|
|---|
| 334 | size_t count)
|
|---|
| 335 | {
|
|---|
| 336 | bool atomic = false;
|
|---|
| 337 | ssize_t ret = 0;
|
|---|
| 338 |
|
|---|
| 339 | START_PROFILE_BYTES(syscall_sendfile, count);
|
|---|
| 340 |
|
|---|
| 341 | if (lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
|---|
| 342 | PARM_ATOMIC_SENDFILE,
|
|---|
| 343 | PARM_ATOMIC_SENDFILE_DEFAULT)) {
|
|---|
| 344 | atomic = true;
|
|---|
| 345 | }
|
|---|
| 346 |
|
|---|
| 347 | /* Try the sendfile */
|
|---|
| 348 | ret = onefs_sys_do_sendfile(tofd, fromfd, header, offset, count,
|
|---|
| 349 | atomic);
|
|---|
| 350 |
|
|---|
| 351 | /* If the sendfile wasn't atomic, we're done. */
|
|---|
| 352 | if (!atomic) {
|
|---|
| 353 | DEBUG(10, ("non-atomic sendfile read %ul bytes\n", ret));
|
|---|
| 354 | END_PROFILE(syscall_sendfile);
|
|---|
| 355 | return ret;
|
|---|
| 356 | }
|
|---|
| 357 |
|
|---|
| 358 | /*
|
|---|
| 359 | * Atomic sendfile takes care to not write anything to the socket
|
|---|
| 360 | * until all of the requested bytes have been read from the file.
|
|---|
| 361 | * There are two atomic cases that need to be handled.
|
|---|
| 362 | *
|
|---|
| 363 | * 1. The file was truncated causing less data to be read than was
|
|---|
| 364 | * requested. In this case, we return back to the caller to
|
|---|
| 365 | * indicate 0 bytes were written to the socket. This should
|
|---|
| 366 | * prompt the caller to fallback to the standard read path: read
|
|---|
| 367 | * the data, create a header that indicates how many bytes were
|
|---|
| 368 | * actually read, and send the header/data back to the client.
|
|---|
| 369 | *
|
|---|
| 370 | * This saves us from standard sendfile behavior of sending a
|
|---|
| 371 | * header promising more data then will actually be sent. The
|
|---|
| 372 | * only two options are to close the socket and kill the client
|
|---|
| 373 | * connection, or write a bunch of 0s. Closing the client
|
|---|
| 374 | * connection is bad because there could actually be multiple
|
|---|
| 375 | * sessions multiplexed from the same client that are all dropped
|
|---|
| 376 | * because of a truncate. Writing the remaining data as 0s also
|
|---|
| 377 | * isn't good, because the client will have an incorrect version
|
|---|
| 378 | * of the file. If the file is written back to the server, the 0s
|
|---|
| 379 | * will be written back. Fortunately, atomic sendfile allows us
|
|---|
| 380 | * to avoid making this choice in most cases.
|
|---|
| 381 | *
|
|---|
| 382 | * 2. One downside of atomic sendfile, is that there is a limit on
|
|---|
| 383 | * the number of bytes that can be sent atomically. The kernel
|
|---|
| 384 | * has a limited amount of mbuf space that it can read file data
|
|---|
| 385 | * into without exhausting the system's mbufs, so a buffer of
|
|---|
| 386 | * length xfsize is used. The xfsize at the time of writing this
|
|---|
| 387 | * is 64K. xfsize bytes are read from the file, and subsequently
|
|---|
| 388 | * written to the socket. This makes it impossible to do the
|
|---|
| 389 | * sendfile atomically for a byte count > xfsize.
|
|---|
| 390 | *
|
|---|
| 391 | * To cope with large requests, atomic sendfile returns -1 with
|
|---|
| 392 | * errno set to E2BIG. Since windows maxes out at 64K writes,
|
|---|
| 393 | * this is currently only a concern with non-windows clients.
|
|---|
| 394 | * Posix extensions allow the full 24bit bytecount field to be
|
|---|
| 395 | * used in ReadAndX, and clients such as smbclient and the linux
|
|---|
| 396 | * cifs client can request up to 16MB reads! There are a few
|
|---|
| 397 | * options for handling large sendfile requests.
|
|---|
| 398 | *
|
|---|
| 399 | * a. Fall back to the standard read path. This is unacceptable
|
|---|
| 400 | * because it would require prohibitively large mallocs.
|
|---|
| 401 | *
|
|---|
| 402 | * b. Fall back to using samba's fake_send_file which emulates
|
|---|
| 403 | * the kernel sendfile in userspace. This still has the same
|
|---|
| 404 | * problem of sending the header before all of the data has
|
|---|
| 405 | * been read, so it doesn't buy us anything, and has worse
|
|---|
| 406 | * performance than the kernel's zero-copy sendfile.
|
|---|
| 407 | *
|
|---|
| 408 | * c. Use non-atomic sendfile syscall to attempt a zero copy
|
|---|
| 409 | * read, and hope that there isn't a short read due to
|
|---|
| 410 | * truncation. In the case of a short read, there are two
|
|---|
| 411 | * options:
|
|---|
| 412 | *
|
|---|
| 413 | * 1. Kill the client connection
|
|---|
| 414 | *
|
|---|
| 415 | * 2. Write zeros to the socket for the remaining bytes
|
|---|
| 416 | * promised in the header.
|
|---|
| 417 | *
|
|---|
| 418 | * It is safer from a data corruption perspective to kill the
|
|---|
| 419 | * client connection, so this is our default behavior, but if
|
|---|
| 420 | * this causes problems this can be configured to write zeros
|
|---|
| 421 | * via smb.conf.
|
|---|
| 422 | */
|
|---|
| 423 |
|
|---|
| 424 | /* Handle case 1: short read -> truncated file. */
|
|---|
| 425 | if (ret == 0) {
|
|---|
| 426 | END_PROFILE(syscall_sendfile);
|
|---|
| 427 | return ret;
|
|---|
| 428 | }
|
|---|
| 429 |
|
|---|
| 430 | /* Handle case 2: large read. */
|
|---|
| 431 | if (ret == -1 && errno == E2BIG) {
|
|---|
| 432 |
|
|---|
| 433 | if (!lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
|---|
| 434 | PARM_SENDFILE_LARGE_READS,
|
|---|
| 435 | PARM_SENDFILE_LARGE_READS_DEFAULT)) {
|
|---|
| 436 | DEBUG(3, ("Not attempting non-atomic large sendfile: "
|
|---|
| 437 | "%lu bytes\n", count));
|
|---|
| 438 | END_PROFILE(syscall_sendfile);
|
|---|
| 439 | return 0;
|
|---|
| 440 | }
|
|---|
| 441 |
|
|---|
| 442 | if (count < 0x10000) {
|
|---|
| 443 | DEBUG(0, ("Count < 2^16 and E2BIG was returned! %lu\n",
|
|---|
| 444 | count));
|
|---|
| 445 | }
|
|---|
| 446 |
|
|---|
| 447 | DEBUG(10, ("attempting non-atomic large sendfile: %lu bytes\n",
|
|---|
| 448 | count));
|
|---|
| 449 |
|
|---|
| 450 | /* Try a non-atomic sendfile. */
|
|---|
| 451 | ret = onefs_sys_do_sendfile(tofd, fromfd, header, offset,
|
|---|
| 452 | count, false);
|
|---|
| 453 | /* Real error: kill the client connection. */
|
|---|
| 454 | if (ret == -1) {
|
|---|
| 455 | DEBUG(1, ("error on non-atomic large sendfile "
|
|---|
| 456 | "(%lu bytes): %s\n", count,
|
|---|
| 457 | strerror(errno)));
|
|---|
| 458 | END_PROFILE(syscall_sendfile);
|
|---|
| 459 | return ret;
|
|---|
| 460 | }
|
|---|
| 461 |
|
|---|
| 462 | /* Short read: kill the client connection. */
|
|---|
| 463 | if (ret != count + header->length) {
|
|---|
| 464 | DEBUG(1, ("short read on non-atomic large sendfile "
|
|---|
| 465 | "(%lu of %lu bytes): %s\n", ret, count,
|
|---|
| 466 | strerror(errno)));
|
|---|
| 467 |
|
|---|
| 468 | /*
|
|---|
| 469 | * Returning ret here would cause us to drop into the
|
|---|
| 470 | * codepath that calls sendfile_short_send, which
|
|---|
| 471 | * sends the client a bunch of zeros instead.
|
|---|
| 472 | * Returning -1 kills the connection.
|
|---|
| 473 | */
|
|---|
| 474 | if (lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
|
|---|
| 475 | PARM_SENDFILE_SAFE,
|
|---|
| 476 | PARM_SENDFILE_SAFE_DEFAULT)) {
|
|---|
| 477 | END_PROFILE(syscall_sendfile);
|
|---|
| 478 | return -1;
|
|---|
| 479 | }
|
|---|
| 480 |
|
|---|
| 481 | END_PROFILE(syscall_sendfile);
|
|---|
| 482 | return ret;
|
|---|
| 483 | }
|
|---|
| 484 |
|
|---|
| 485 | DEBUG(10, ("non-atomic large sendfile successful\n"));
|
|---|
| 486 | }
|
|---|
| 487 |
|
|---|
| 488 | /* There was error in the atomic sendfile. */
|
|---|
| 489 | if (ret == -1) {
|
|---|
| 490 | DEBUG(1, ("error on %s sendfile (%lu bytes): %s\n",
|
|---|
| 491 | atomic ? "atomic" : "non-atomic",
|
|---|
| 492 | count, strerror(errno)));
|
|---|
| 493 | }
|
|---|
| 494 |
|
|---|
| 495 | END_PROFILE(syscall_sendfile);
|
|---|
| 496 | return ret;
|
|---|
| 497 | }
|
|---|
| 498 |
|
|---|
| 499 | /**
|
|---|
| 500 | * Only talloc the spill buffer once (reallocing when necessary).
|
|---|
| 501 | */
|
|---|
| 502 | static char *get_spill_buffer(size_t new_count)
|
|---|
| 503 | {
|
|---|
| 504 | static int cur_count = 0;
|
|---|
| 505 | static char *spill_buffer = NULL;
|
|---|
| 506 |
|
|---|
| 507 | /* If a sufficiently sized buffer exists, just return. */
|
|---|
| 508 | if (new_count <= cur_count) {
|
|---|
| 509 | SMB_ASSERT(spill_buffer);
|
|---|
| 510 | return spill_buffer;
|
|---|
| 511 | }
|
|---|
| 512 |
|
|---|
| 513 | /* Allocate the first time. */
|
|---|
| 514 | if (cur_count == 0) {
|
|---|
| 515 | SMB_ASSERT(!spill_buffer);
|
|---|
| 516 | spill_buffer = talloc_array(NULL, char, new_count);
|
|---|
| 517 | if (spill_buffer) {
|
|---|
| 518 | cur_count = new_count;
|
|---|
| 519 | }
|
|---|
| 520 | return spill_buffer;
|
|---|
| 521 | }
|
|---|
| 522 |
|
|---|
| 523 | /* A buffer exists, but it's not big enough, so realloc. */
|
|---|
| 524 | SMB_ASSERT(spill_buffer);
|
|---|
| 525 | spill_buffer = talloc_realloc(NULL, spill_buffer, char, new_count);
|
|---|
| 526 | if (spill_buffer) {
|
|---|
| 527 | cur_count = new_count;
|
|---|
| 528 | }
|
|---|
| 529 | return spill_buffer;
|
|---|
| 530 | }
|
|---|
| 531 |
|
|---|
| 532 | /**
|
|---|
| 533 | * recvfile does zero-copy writes given an fd to write to, and a socket with
|
|---|
| 534 | * some data to write. If recvfile read more than it was able to write, it
|
|---|
| 535 | * spills the data into a buffer. After first reading any additional data
|
|---|
| 536 | * from the socket into the buffer, the spill buffer is then written with a
|
|---|
| 537 | * standard pwrite.
|
|---|
| 538 | */
|
|---|
| 539 | ssize_t onefs_sys_recvfile(int fromfd, int tofd, SMB_OFF_T offset,
|
|---|
| 540 | size_t count)
|
|---|
| 541 | {
|
|---|
| 542 | char *spill_buffer = NULL;
|
|---|
| 543 | bool socket_drained = false;
|
|---|
| 544 | int ret;
|
|---|
| 545 | off_t total_rbytes = 0;
|
|---|
| 546 | off_t total_wbytes = 0;
|
|---|
| 547 | off_t rbytes;
|
|---|
| 548 | off_t wbytes;
|
|---|
| 549 |
|
|---|
| 550 | START_PROFILE_BYTES(syscall_recvfile, count);
|
|---|
| 551 |
|
|---|
| 552 | DEBUG(10,("onefs_recvfile: from = %d, to = %d, offset=%llu, count = "
|
|---|
| 553 | "%lu\n", fromfd, tofd, offset, count));
|
|---|
| 554 |
|
|---|
| 555 | if (count == 0) {
|
|---|
| 556 | END_PROFILE(syscall_recvfile);
|
|---|
| 557 | return 0;
|
|---|
| 558 | }
|
|---|
| 559 |
|
|---|
| 560 | /*
|
|---|
| 561 | * Setup up a buffer for recvfile to spill data that has been read
|
|---|
| 562 | * from the socket but not written.
|
|---|
| 563 | */
|
|---|
| 564 | spill_buffer = get_spill_buffer(count);
|
|---|
| 565 | if (spill_buffer == NULL) {
|
|---|
| 566 | ret = -1;
|
|---|
| 567 | goto out;
|
|---|
| 568 | }
|
|---|
| 569 |
|
|---|
| 570 | /*
|
|---|
| 571 | * Keep trying recvfile until:
|
|---|
| 572 | * - There is no data left to read on the socket, or
|
|---|
| 573 | * - bytes read != bytes written, or
|
|---|
| 574 | * - An error is returned that isn't EINTR/EAGAIN
|
|---|
| 575 | */
|
|---|
| 576 | do {
|
|---|
| 577 | /* Keep track of bytes read/written for recvfile */
|
|---|
| 578 | rbytes = 0;
|
|---|
| 579 | wbytes = 0;
|
|---|
| 580 |
|
|---|
| 581 | DEBUG(10, ("calling recvfile loop, offset + total_wbytes = "
|
|---|
| 582 | "%llu, count - total_rbytes = %llu\n",
|
|---|
| 583 | offset + total_wbytes, count - total_rbytes));
|
|---|
| 584 |
|
|---|
| 585 | ret = recvfile(tofd, fromfd, offset + total_wbytes,
|
|---|
| 586 | count - total_wbytes, &rbytes, &wbytes, 0,
|
|---|
| 587 | spill_buffer);
|
|---|
| 588 |
|
|---|
| 589 | DEBUG(10, ("recvfile ret = %d, errno = %d, rbytes = %llu, "
|
|---|
| 590 | "wbytes = %llu\n", ret, ret >= 0 ? 0 : errno,
|
|---|
| 591 | rbytes, wbytes));
|
|---|
| 592 |
|
|---|
| 593 | /* Update our progress so far */
|
|---|
| 594 | total_rbytes += rbytes;
|
|---|
| 595 | total_wbytes += wbytes;
|
|---|
| 596 |
|
|---|
| 597 | } while ((count - total_rbytes) && (rbytes == wbytes) &&
|
|---|
| 598 | (ret == -1 && (errno == EINTR || errno == EAGAIN)));
|
|---|
| 599 |
|
|---|
| 600 | DEBUG(10, ("total_rbytes = %llu, total_wbytes = %llu\n",
|
|---|
| 601 | total_rbytes, total_wbytes));
|
|---|
| 602 |
|
|---|
| 603 | /* Log if recvfile didn't write everything it read. */
|
|---|
| 604 | if (total_rbytes != total_wbytes) {
|
|---|
| 605 | DEBUG(3, ("partial recvfile: total_rbytes=%llu but "
|
|---|
| 606 | "total_wbytes=%llu, diff = %llu\n", total_rbytes,
|
|---|
| 607 | total_wbytes, total_rbytes - total_wbytes));
|
|---|
| 608 | SMB_ASSERT(total_rbytes > total_wbytes);
|
|---|
| 609 | }
|
|---|
| 610 |
|
|---|
| 611 | /*
|
|---|
| 612 | * If there is still data on the socket, read it off.
|
|---|
| 613 | */
|
|---|
| 614 | while (total_rbytes < count) {
|
|---|
| 615 |
|
|---|
| 616 | DEBUG(3, ("shallow recvfile (%s), reading %llu\n",
|
|---|
| 617 | strerror(errno), count - total_rbytes));
|
|---|
| 618 |
|
|---|
| 619 | /*
|
|---|
| 620 | * Read the remaining data into the spill buffer. recvfile
|
|---|
| 621 | * may already have some data in the spill buffer, so start
|
|---|
| 622 | * filling the buffer at total_rbytes - total_wbytes.
|
|---|
| 623 | */
|
|---|
| 624 | ret = sys_read(fromfd,
|
|---|
| 625 | spill_buffer + (total_rbytes - total_wbytes),
|
|---|
| 626 | count - total_rbytes);
|
|---|
| 627 |
|
|---|
| 628 | if (ret <= 0) {
|
|---|
| 629 | if (ret == 0) {
|
|---|
| 630 | DEBUG(0, ("shallow recvfile read: EOF\n"));
|
|---|
| 631 | } else {
|
|---|
| 632 | DEBUG(0, ("shallow recvfile read failed: %s\n",
|
|---|
| 633 | strerror(errno)));
|
|---|
| 634 | }
|
|---|
| 635 | /* Socket is dead, so treat as if it were drained. */
|
|---|
| 636 | socket_drained = true;
|
|---|
| 637 | goto out;
|
|---|
| 638 | }
|
|---|
| 639 |
|
|---|
| 640 | /* Data was read so update the rbytes */
|
|---|
| 641 | total_rbytes += ret;
|
|---|
| 642 | }
|
|---|
| 643 |
|
|---|
| 644 | if (total_rbytes != count) {
|
|---|
| 645 | smb_panic("Unread recvfile data still on the socket!");
|
|---|
| 646 | }
|
|---|
| 647 |
|
|---|
| 648 | /*
|
|---|
| 649 | * Now write any spilled data + the extra data read off the socket.
|
|---|
| 650 | */
|
|---|
| 651 | while (total_wbytes < count) {
|
|---|
| 652 |
|
|---|
| 653 | DEBUG(3, ("partial recvfile, writing %llu\n", count - total_wbytes));
|
|---|
| 654 |
|
|---|
| 655 | ret = sys_pwrite(tofd, spill_buffer, count - total_wbytes,
|
|---|
| 656 | offset + total_wbytes);
|
|---|
| 657 |
|
|---|
| 658 | if (ret == -1) {
|
|---|
| 659 | DEBUG(0, ("partial recvfile write failed: %s\n",
|
|---|
| 660 | strerror(errno)));
|
|---|
| 661 | goto out;
|
|---|
| 662 | }
|
|---|
| 663 |
|
|---|
| 664 | /* Data was written so update the wbytes */
|
|---|
| 665 | total_wbytes += ret;
|
|---|
| 666 | }
|
|---|
| 667 |
|
|---|
| 668 | /* Success! */
|
|---|
| 669 | ret = total_wbytes;
|
|---|
| 670 |
|
|---|
| 671 | out:
|
|---|
| 672 |
|
|---|
| 673 | END_PROFILE(syscall_recvfile);
|
|---|
| 674 |
|
|---|
| 675 | /* Make sure we always try to drain the socket. */
|
|---|
| 676 | if (!socket_drained && count - total_rbytes) {
|
|---|
| 677 | int saved_errno = errno;
|
|---|
| 678 |
|
|---|
| 679 | if (drain_socket(fromfd, count - total_rbytes) !=
|
|---|
| 680 | count - total_rbytes) {
|
|---|
| 681 | /* Socket is dead! */
|
|---|
| 682 | DEBUG(0, ("drain socket failed: %d\n", errno));
|
|---|
| 683 | }
|
|---|
| 684 | errno = saved_errno;
|
|---|
| 685 | }
|
|---|
| 686 |
|
|---|
| 687 | return ret;
|
|---|
| 688 | }
|
|---|
| 689 |
|
|---|
| 690 | void init_stat_ex_from_onefs_stat(struct stat_ex *dst, const struct stat *src)
|
|---|
| 691 | {
|
|---|
| 692 | ZERO_STRUCT(*dst);
|
|---|
| 693 |
|
|---|
| 694 | dst->st_ex_dev = src->st_dev;
|
|---|
| 695 | dst->st_ex_ino = src->st_ino;
|
|---|
| 696 | dst->st_ex_mode = src->st_mode;
|
|---|
| 697 | dst->st_ex_nlink = src->st_nlink;
|
|---|
| 698 | dst->st_ex_uid = src->st_uid;
|
|---|
| 699 | dst->st_ex_gid = src->st_gid;
|
|---|
| 700 | dst->st_ex_rdev = src->st_rdev;
|
|---|
| 701 | dst->st_ex_size = src->st_size;
|
|---|
| 702 | dst->st_ex_atime = src->st_atimespec;
|
|---|
| 703 | dst->st_ex_mtime = src->st_mtimespec;
|
|---|
| 704 | dst->st_ex_ctime = src->st_ctimespec;
|
|---|
| 705 | dst->st_ex_btime = src->st_birthtimespec;
|
|---|
| 706 | dst->st_ex_blksize = src->st_blksize;
|
|---|
| 707 | dst->st_ex_blocks = src->st_blocks;
|
|---|
| 708 |
|
|---|
| 709 | dst->st_ex_flags = src->st_flags;
|
|---|
| 710 |
|
|---|
| 711 | dst->vfs_private = src->st_snapid;
|
|---|
| 712 | }
|
|---|
| 713 |
|
|---|
| 714 | int onefs_sys_stat(const char *fname, SMB_STRUCT_STAT *sbuf)
|
|---|
| 715 | {
|
|---|
| 716 | int ret;
|
|---|
| 717 | struct stat onefs_sbuf;
|
|---|
| 718 |
|
|---|
| 719 | ret = stat(fname, &onefs_sbuf);
|
|---|
| 720 |
|
|---|
| 721 | if (ret == 0) {
|
|---|
| 722 | /* we always want directories to appear zero size */
|
|---|
| 723 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
|---|
| 724 | onefs_sbuf.st_size = 0;
|
|---|
| 725 | }
|
|---|
| 726 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
|---|
| 727 | }
|
|---|
| 728 | return ret;
|
|---|
| 729 | }
|
|---|
| 730 |
|
|---|
| 731 | int onefs_sys_fstat(int fd, SMB_STRUCT_STAT *sbuf)
|
|---|
| 732 | {
|
|---|
| 733 | int ret;
|
|---|
| 734 | struct stat onefs_sbuf;
|
|---|
| 735 |
|
|---|
| 736 | ret = fstat(fd, &onefs_sbuf);
|
|---|
| 737 |
|
|---|
| 738 | if (ret == 0) {
|
|---|
| 739 | /* we always want directories to appear zero size */
|
|---|
| 740 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
|---|
| 741 | onefs_sbuf.st_size = 0;
|
|---|
| 742 | }
|
|---|
| 743 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
|---|
| 744 | }
|
|---|
| 745 | return ret;
|
|---|
| 746 | }
|
|---|
| 747 |
|
|---|
| 748 | int onefs_sys_fstat_at(int base_fd, const char *fname, SMB_STRUCT_STAT *sbuf,
|
|---|
| 749 | int flags)
|
|---|
| 750 | {
|
|---|
| 751 | int ret;
|
|---|
| 752 | struct stat onefs_sbuf;
|
|---|
| 753 |
|
|---|
| 754 | ret = enc_fstatat(base_fd, fname, ENC_DEFAULT, &onefs_sbuf, flags);
|
|---|
| 755 |
|
|---|
| 756 | if (ret == 0) {
|
|---|
| 757 | /* we always want directories to appear zero size */
|
|---|
| 758 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
|---|
| 759 | onefs_sbuf.st_size = 0;
|
|---|
| 760 | }
|
|---|
| 761 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
|---|
| 762 | }
|
|---|
| 763 | return ret;
|
|---|
| 764 | }
|
|---|
| 765 |
|
|---|
| 766 | int onefs_sys_lstat(const char *fname, SMB_STRUCT_STAT *sbuf)
|
|---|
| 767 | {
|
|---|
| 768 | int ret;
|
|---|
| 769 | struct stat onefs_sbuf;
|
|---|
| 770 |
|
|---|
| 771 | ret = lstat(fname, &onefs_sbuf);
|
|---|
| 772 |
|
|---|
| 773 | if (ret == 0) {
|
|---|
| 774 | /* we always want directories to appear zero size */
|
|---|
| 775 | if (S_ISDIR(onefs_sbuf.st_mode)) {
|
|---|
| 776 | onefs_sbuf.st_size = 0;
|
|---|
| 777 | }
|
|---|
| 778 | init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf);
|
|---|
| 779 | }
|
|---|
| 780 | return ret;
|
|---|
| 781 | }
|
|---|
| 782 |
|
|---|