]> git.proxmox.com Git - mirror_zfs.git/commitdiff
zfs_ioc_send: use a dedicated taskq thread for send
authorRob Norris <rob.norris@klarasystems.com>
Thu, 2 May 2024 01:57:23 +0000 (11:57 +1000)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 14 May 2024 16:39:26 +0000 (09:39 -0700)
When stack space is tight, the stream is written to its target on a
separate taskq thread to make sure there's enough stack space to
complete it.

This has always used an IO taskq, but that doesn't really make sense for
it, and moving it onto a regular taskq lets us get rid of
spa_taskq_dispatch_sync(), which is not used anywhere else.

Stream writes may block for a long time depending on what the target is,
and we have no way of discovering this, so we can't risk using the
system taskq, as there may be many tens of sends in progress. Instead,
we create a dedicated taskq thread for each send writer to run on, and
clean it up when it's done.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #16151

module/zfs/zfs_ioctl.c

index 908b9efc18130d04b807b9766b728a22dcddef48..b720b4f222b16b1ea4c26a0d89dc65d533c8a5ec 100644 (file)
@@ -38,7 +38,7 @@
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
- * Copyright (c) 2019, 2021, Klara Inc.
+ * Copyright (c) 2019, 2021, 2024, Klara Inc.
  * Copyright (c) 2019, Allan Jude
  * Copyright 2024 Oxide Computer Company
  */
@@ -5514,6 +5514,14 @@ out:
        return (error);
 }
 
+/*
+ * When stack space is limited, we write replication stream data to the target
+ * on a separate taskq thread, to make sure there's enough stack space.
+ */
+#ifndef HAVE_LARGE_STACKS
+#define        USE_SEND_TASKQ  1
+#endif
+
 typedef struct dump_bytes_io {
        zfs_file_t      *dbi_fp;
        caddr_t         dbi_buf;
@@ -5534,31 +5542,65 @@ dump_bytes_cb(void *arg)
        dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
 }
 
+typedef struct dump_bytes_arg {
+       zfs_file_t      *dba_fp;
+#ifdef USE_SEND_TASKQ
+       taskq_t         *dba_tq;
+       taskq_ent_t     dba_tqent;
+#endif
+} dump_bytes_arg_t;
+
 static int
 dump_bytes(objset_t *os, void *buf, int len, void *arg)
 {
+       dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
        dump_bytes_io_t dbi;
 
-       dbi.dbi_fp = arg;
+       dbi.dbi_fp = dba->dba_fp;
        dbi.dbi_buf = buf;
        dbi.dbi_len = len;
 
-#if defined(HAVE_LARGE_STACKS)
-       dump_bytes_cb(&dbi);
+#ifdef USE_SEND_TASKQ
+       taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
+           &dba->dba_tqent);
+       taskq_wait(dba->dba_tq);
 #else
-       /*
-        * The vn_rdwr() call is performed in a taskq to ensure that there is
-        * always enough stack space to write safely to the target filesystem.
-        * The ZIO_TYPE_FREE threads are used because there can be a lot of
-        * them and they are used in vdev_file.c for a similar purpose.
-        */
-       spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
-           ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
-#endif /* HAVE_LARGE_STACKS */
+       dump_bytes_cb(&dbi);
+#endif
 
        return (dbi.dbi_err);
 }
 
+static int
+dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
+{
+       zfs_file_t *fp = zfs_file_get(fd);
+       if (fp == NULL)
+               return (SET_ERROR(EBADF));
+
+       dba->dba_fp = fp;
+#ifdef USE_SEND_TASKQ
+       dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
+       taskq_init_ent(&dba->dba_tqent);
+#endif
+
+       memset(out, 0, sizeof (dmu_send_outparams_t));
+       out->dso_outfunc = dump_bytes;
+       out->dso_arg = dba;
+       out->dso_dryrun = B_FALSE;
+
+       return (0);
+}
+
+static void
+dump_bytes_fini(dump_bytes_arg_t *dba)
+{
+       zfs_file_put(dba->dba_fp);
+#ifdef USE_SEND_TASKQ
+       taskq_destroy(dba->dba_tq);
+#endif
+}
+
 /*
  * inputs:
  * zc_name     name of snapshot to send
@@ -5643,21 +5685,18 @@ zfs_ioc_send(zfs_cmd_t *zc)
                dsl_dataset_rele(tosnap, FTAG);
                dsl_pool_rele(dp, FTAG);
        } else {
-               zfs_file_t *fp;
-               dmu_send_outparams_t out = {0};
-
-               if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
-                       return (SET_ERROR(EBADF));
+               dump_bytes_arg_t dba;
+               dmu_send_outparams_t out;
+               error = dump_bytes_init(&dba, zc->zc_cookie, &out);
+               if (error)
+                       return (error);
 
-               off = zfs_file_off(fp);
-               out.dso_outfunc = dump_bytes;
-               out.dso_arg = fp;
-               out.dso_dryrun = B_FALSE;
+               off = zfs_file_off(dba.dba_fp);
                error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
                    zc->zc_fromobj, embedok, large_block_ok, compressok,
                    rawok, savedok, zc->zc_cookie, &off, &out);
 
-               zfs_file_put(fp);
+               dump_bytes_fini(&dba);
        }
        return (error);
 }
@@ -6604,7 +6643,6 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
        offset_t off;
        const char *fromname = NULL;
        int fd;
-       zfs_file_t *fp;
        boolean_t largeblockok;
        boolean_t embedok;
        boolean_t compressok;
@@ -6629,20 +6667,19 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 
        (void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
 
-       if ((fp = zfs_file_get(fd)) == NULL)
-               return (SET_ERROR(EBADF));
-
-       off = zfs_file_off(fp);
+       dump_bytes_arg_t dba;
+       dmu_send_outparams_t out;
+       error = dump_bytes_init(&dba, fd, &out);
+       if (error)
+               return (error);
 
-       dmu_send_outparams_t out = {0};
-       out.dso_outfunc = dump_bytes;
-       out.dso_arg = fp;
-       out.dso_dryrun = B_FALSE;
+       off = zfs_file_off(dba.dba_fp);
        error = dmu_send(snapname, fromname, embedok, largeblockok,
            compressok, rawok, savedok, resumeobj, resumeoff,
            redactbook, fd, &off, &out);
 
-       zfs_file_put(fp);
+       dump_bytes_fini(&dba);
+
        return (error);
 }