]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Parallel pool import
authorGeorge Wilson <george.wilson@delphix.com>
Mon, 22 Apr 2024 16:42:38 +0000 (12:42 -0400)
committerGitHub <noreply@github.com>
Mon, 22 Apr 2024 16:42:38 +0000 (09:42 -0700)
This commit allow spa_load() to drop the spa_namespace_lock so
that imports can happen concurrently. Prior to dropping the
spa_namespace_lock, the import logic will set the spa_load_thread
value to track the thread which is doing the import.

Consumers of spa_lookup() retain the same behavior by blocking
when either a thread is holding the spa_namespace_lock or the
spa_load_thread value is set. This will ensure that critical
concurrent operations cannot take place while a pool is being
imported.

The zpool command is also enhanced to provide multi-threaded support
when invoking zpool import -a.

Lastly, zinject provides a mechanism to insert artificial delays
when importing a pool and new zfs tests are added to verify parallel
import functionality.

Contributions-by: Don Brady <don.brady@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Wilson <gwilson@delphix.com>
Closes #16093

19 files changed:
cmd/zinject/zinject.c
cmd/zpool/zpool_main.c
include/libzutil.h
include/sys/spa.h
include/sys/spa_impl.h
include/sys/zfs_ioctl.h
include/sys/zio.h
man/man8/zinject.8
module/zfs/spa.c
module/zfs/spa_misc.c
module/zfs/vdev_initialize.c
module/zfs/vdev_rebuild.c
module/zfs/vdev_trim.c
module/zfs/zio_inject.c
tests/runfiles/common.run
tests/zfs-tests/tests/Makefile.am
tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh [new file with mode: 0755]

index e9141fb4ba5571961074f5c548625e0aca626213..ed60cce3dd164ea3c080954ac864f9b127e70df4 100644 (file)
@@ -22,7 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
- * Copyright (c) 2024, Klara Inc.
+ * Copyright (c) 2023-2024, Klara Inc.
  */
 
 /*
@@ -310,6 +310,11 @@ usage(void)
            "\t\tcreate 3 lanes on the device; one lane with a latency\n"
            "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
            "\n"
+           "\tzinject -P import|export -s <seconds> pool\n"
+           "\t\tAdd an artificial delay to a future pool import or export,\n"
+           "\t\tsuch that the operation takes a minimum of supplied seconds\n"
+           "\t\tto complete.\n"
+           "\n"
            "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
            "\t\tCause the pool to stop writing blocks yet not\n"
            "\t\treport errors for a duration.  Simulates buggy hardware\n"
@@ -392,8 +397,10 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
 {
        int *count = data;
 
-       if (record->zi_guid != 0 || record->zi_func[0] != '\0')
+       if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
+           record->zi_duration != 0) {
                return (0);
+       }
 
        if (*count == 0) {
                (void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
@@ -507,6 +514,33 @@ print_panic_handler(int id, const char *pool, zinject_record_t *record,
        return (0);
 }
 
+static int
+print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
+    void *data)
+{
+       int *count = data;
+
+       if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
+           record->zi_cmd != ZINJECT_DELAY_EXPORT) {
+               return (0);
+       }
+
+       if (*count == 0) {
+               (void) printf("%3s  %-19s  %-11s  %s\n",
+                   "ID", "POOL", "DELAY (sec)", "COMMAND");
+               (void) printf("---  -------------------  -----------"
+                   "  -------\n");
+       }
+
+       *count += 1;
+
+       (void) printf("%3d  %-19s  %-11llu  %s\n",
+           id, pool, (u_longlong_t)record->zi_duration,
+           record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
+
+       return (0);
+}
+
 /*
  * Print all registered error handlers.  Returns the number of handlers
  * registered.
@@ -537,6 +571,13 @@ print_all_handlers(void)
                count = 0;
        }
 
+       (void) iter_handlers(print_pool_delay_handler, &count);
+       if (count > 0) {
+               total += count;
+               (void) printf("\n");
+               count = 0;
+       }
+
        (void) iter_handlers(print_panic_handler, &count);
 
        return (count + total);
@@ -609,9 +650,27 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
        zc.zc_guid = flags;
 
        if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
-               (void) fprintf(stderr, "failed to add handler: %s\n",
-                   errno == EDOM ? "block level exceeds max level of object" :
-                   strerror(errno));
+               const char *errmsg = strerror(errno);
+
+               switch (errno) {
+               case EDOM:
+                       errmsg = "block level exceeds max level of object";
+                       break;
+               case EEXIST:
+                       if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
+                               errmsg = "pool already imported";
+                       if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
+                               errmsg = "a handler already exists";
+                       break;
+               case ENOENT:
+                       /* import delay injector running on older zfs module */
+                       if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
+                               errmsg = "import delay injector not supported";
+                       break;
+               default:
+                       break;
+               }
+               (void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
                return (1);
        }
 
@@ -636,6 +695,9 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
                } else if (record->zi_duration < 0) {
                        (void) printf(" txgs: %lld \n",
                            (u_longlong_t)-record->zi_duration);
+               } else if (record->zi_timer > 0) {
+                       (void) printf(" timer: %lld ms\n",
+                           (u_longlong_t)NSEC2MSEC(record->zi_timer));
                } else {
                        (void) printf("objset: %llu\n",
                            (u_longlong_t)record->zi_objset);
@@ -834,7 +896,7 @@ main(int argc, char **argv)
        }
 
        while ((c = getopt(argc, argv,
-           ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
+           ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
                switch (c) {
                case 'a':
                        flags |= ZINJECT_FLUSH_ARC;
@@ -952,6 +1014,19 @@ main(int argc, char **argv)
                            sizeof (record.zi_func));
                        record.zi_cmd = ZINJECT_PANIC;
                        break;
+               case 'P':
+                       if (strcasecmp(optarg, "import") == 0) {
+                               record.zi_cmd = ZINJECT_DELAY_IMPORT;
+                       } else if (strcasecmp(optarg, "export") == 0) {
+                               record.zi_cmd = ZINJECT_DELAY_EXPORT;
+                       } else {
+                               (void) fprintf(stderr, "invalid command '%s': "
+                                   "must be 'import' or 'export'\n", optarg);
+                               usage();
+                               libzfs_fini(g_zfs);
+                               return (1);
+                       }
+                       break;
                case 'q':
                        quiet = 1;
                        break;
@@ -1033,7 +1108,7 @@ main(int argc, char **argv)
        argc -= optind;
        argv += optind;
 
-       if (record.zi_duration != 0)
+       if (record.zi_duration != 0 && record.zi_cmd == 0)
                record.zi_cmd = ZINJECT_IGNORED_WRITES;
 
        if (cancel != NULL) {
@@ -1179,8 +1254,8 @@ main(int argc, char **argv)
                if (raw != NULL || range != NULL || type != TYPE_INVAL ||
                    level != 0 || device != NULL || record.zi_freq > 0 ||
                    dvas != 0) {
-                       (void) fprintf(stderr, "panic (-p) incompatible with "
-                           "other options\n");
+                       (void) fprintf(stderr, "%s incompatible with other "
+                           "options\n", "import|export delay (-P)");
                        usage();
                        libzfs_fini(g_zfs);
                        return (2);
@@ -1198,6 +1273,28 @@ main(int argc, char **argv)
                if (argv[1] != NULL)
                        record.zi_type = atoi(argv[1]);
                dataset[0] = '\0';
+       } else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
+           record.zi_cmd == ZINJECT_DELAY_EXPORT) {
+               if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+                   level != 0 || device != NULL || record.zi_freq > 0 ||
+                   dvas != 0) {
+                       (void) fprintf(stderr, "%s incompatible with other "
+                           "options\n", "import|export delay (-P)");
+                       usage();
+                       libzfs_fini(g_zfs);
+                       return (2);
+               }
+
+               if (argc != 1 || record.zi_duration <= 0) {
+                       (void) fprintf(stderr, "import|export delay (-P) "
+                           "injection requires a duration (-s) and a single "
+                           "pool name\n");
+                       usage();
+                       libzfs_fini(g_zfs);
+                       return (2);
+               }
+
+               (void) strlcpy(pool, argv[0], sizeof (pool));
        } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
                if (raw != NULL || range != NULL || type != TYPE_INVAL ||
                    level != 0 || record.zi_freq > 0 || dvas != 0) {
index d670cd1afeb1bf53de1e83fe3b0ea4ff712c4d96..e6664b918be483314628b63046697adf4e047037 100644 (file)
@@ -50,6 +50,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <thread_pool.h>
 #include <time.h>
 #include <unistd.h>
 #include <pwd.h>
@@ -3455,15 +3456,40 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
        return (ret);
 }
 
+typedef struct import_parameters {
+       nvlist_t *ip_config;
+       const char *ip_mntopts;
+       nvlist_t *ip_props;
+       int ip_flags;
+       int *ip_err;
+} import_parameters_t;
+
+static void
+do_import_task(void *arg)
+{
+       import_parameters_t *ip = arg;
+       *ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
+           ip->ip_props, ip->ip_flags);
+       free(ip);
+}
+
+
 static int
 import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
-    char *orig_name, char *new_name,
-    boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all,
-    importargs_t *import)
+    char *orig_name, char *new_name, importargs_t *import)
 {
        nvlist_t *config = NULL;
        nvlist_t *found_config = NULL;
        uint64_t pool_state;
+       boolean_t pool_specified = (import->poolname != NULL ||
+           import->guid != 0);
+
+
+       tpool_t *tp = NULL;
+       if (import->do_all) {
+               tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN),
+                   0, NULL);
+       }
 
        /*
         * At this point we have a list of import candidate configs. Even if
@@ -3480,9 +3506,11 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
 
                verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
                    &pool_state) == 0);
-               if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
+               if (!import->do_destroyed &&
+                   pool_state == POOL_STATE_DESTROYED)
                        continue;
-               if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
+               if (import->do_destroyed &&
+                   pool_state != POOL_STATE_DESTROYED)
                        continue;
 
                verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
@@ -3491,12 +3519,21 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
                if (!pool_specified) {
                        if (first)
                                first = B_FALSE;
-                       else if (!do_all)
+                       else if (!import->do_all)
                                (void) fputc('\n', stdout);
 
-                       if (do_all) {
-                               err |= do_import(config, NULL, mntopts,
-                                   props, flags);
+                       if (import->do_all) {
+                               import_parameters_t *ip = safe_malloc(
+                                   sizeof (import_parameters_t));
+
+                               ip->ip_config = config;
+                               ip->ip_mntopts = mntopts;
+                               ip->ip_props = props;
+                               ip->ip_flags = flags;
+                               ip->ip_err = &err;
+
+                               (void) tpool_dispatch(tp, do_import_task,
+                                   (void *)ip);
                        } else {
                                /*
                                 * If we're importing from cachefile, then
@@ -3544,6 +3581,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
                                found_config = config;
                }
        }
+       if (import->do_all) {
+               tpool_wait(tp);
+               tpool_destroy(tp);
+       }
 
        /*
         * If we were searching for a specific pool, verify that we found a
@@ -3773,7 +3814,6 @@ zpool_do_import(int argc, char **argv)
        boolean_t xtreme_rewind = B_FALSE;
        boolean_t do_scan = B_FALSE;
        boolean_t pool_exists = B_FALSE;
-       boolean_t pool_specified = B_FALSE;
        uint64_t txg = -1ULL;
        char *cachefile = NULL;
        importargs_t idata = { 0 };
@@ -3972,7 +4012,6 @@ zpool_do_import(int argc, char **argv)
                        searchname = argv[0];
                        searchguid = 0;
                }
-               pool_specified = B_TRUE;
 
                /*
                 * User specified a name or guid.  Ensure it's unique.
@@ -4005,6 +4044,8 @@ zpool_do_import(int argc, char **argv)
        idata.cachefile = cachefile;
        idata.scan = do_scan;
        idata.policy = policy;
+       idata.do_destroyed = do_destroyed;
+       idata.do_all = do_all;
 
        libpc_handle_t lpch = {
                .lpc_lib_handle = g_zfs,
@@ -4047,9 +4088,7 @@ zpool_do_import(int argc, char **argv)
        }
 
        err = import_pools(pools, props, mntopts, flags,
-           argc >= 1 ? argv[0] : NULL,
-           argc >= 2 ? argv[1] : NULL,
-           do_destroyed, pool_specified, do_all, &idata);
+           argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, &idata);
 
        /*
         * If we're using the cachefile and we failed to import, then
@@ -4070,9 +4109,8 @@ zpool_do_import(int argc, char **argv)
                pools = zpool_search_import(&lpch, &idata);
 
                err = import_pools(pools, props, mntopts, flags,
-                   argc >= 1 ? argv[0] : NULL,
-                   argc >= 2 ? argv[1] : NULL,
-                   do_destroyed, pool_specified, do_all, &idata);
+                   argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL,
+                   &idata);
        }
 
 error:
index d9a9a65753dde5495bbb0a5f5a57d76c04841d9f..e2108ceeaa442fa00192ed8558ae69e786b603ff 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018, 2024 by Delphix. All rights reserved.
  */
 
 #ifndef        _LIBZUTIL_H
@@ -79,6 +79,8 @@ typedef struct importargs {
        boolean_t can_be_active; /* can the pool be active?             */
        boolean_t scan;         /* prefer scanning to libblkid cache    */
        nvlist_t *policy;       /* load policy (max txg, rewind, etc.)  */
+       boolean_t do_destroyed;
+       boolean_t do_all;
 } importargs_t;
 
 typedef struct libpc_handle {
index b969f05afe48748a235b1ad30c07452312193d35..ca15025ba33c521cf080aa526884d11893f7cf9f 100644 (file)
@@ -833,6 +833,8 @@ void spa_select_allocator(zio_t *zio);
 
 /* spa namespace global mutex */
 extern kmutex_t spa_namespace_lock;
+extern avl_tree_t spa_namespace_avl;
+extern kcondvar_t spa_namespace_cv;
 
 /*
  * SPA configuration functions in spa_config.c
index 0cd0c4720fbefa00277c9815a17367e992997740..d7da085ab3131c8f586e5e266b6c1166293602cf 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -237,6 +237,7 @@ struct spa {
        dsl_pool_t      *spa_dsl_pool;
        boolean_t       spa_is_initializing;    /* true while opening pool */
        boolean_t       spa_is_exporting;       /* true while exporting pool */
+       kthread_t       *spa_load_thread;       /* loading, no namespace lock */
        metaslab_class_t *spa_normal_class;     /* normal data class */
        metaslab_class_t *spa_log_class;        /* intent log data class */
        metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
index 26dfe97604def0922378910bee4c35671b438fb2..525d40759fddce4a0fc53f5e54f846f58fa53483 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
  * Copyright 2016 RackTop Systems.
  * Copyright (c) 2017, Intel Corporation.
  */
@@ -454,6 +454,8 @@ typedef enum zinject_type {
        ZINJECT_PANIC,
        ZINJECT_DELAY_IO,
        ZINJECT_DECRYPT_FAULT,
+       ZINJECT_DELAY_IMPORT,
+       ZINJECT_DELAY_EXPORT,
 } zinject_type_t;
 
 typedef struct zfs_share {
index 545b9cf0c3c555c42384ad706ee5f4f117247a82..4037b429982bbc22af952ecc0823d4847e24cc73 100644 (file)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright 2016 Toomas Soome <tsoome@me.com>
@@ -686,6 +686,8 @@ extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1,
 extern int zio_handle_label_injection(zio_t *zio, int error);
 extern void zio_handle_ignored_writes(zio_t *zio);
 extern hrtime_t zio_handle_io_delay(zio_t *zio);
+extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed);
+extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed);
 
 /*
  * Checksum ereport functions
index f67b5e378dc329047da13f1e2c79cd5423937e5f..ad9e7a42bfac4fa2c53fe4b019222484591e4d9e 100644 (file)
@@ -129,6 +129,14 @@ Force a vdev error.
 .
 .It Xo
 .Nm zinject
+.Fl i Ar seconds
+.Ar pool
+.Xc
+Add an artificial delay during the future import of a pool.
+This injector is automatically cleared after the import is finished.
+.
+.It Xo
+.Nm zinject
 .Fl I
 .Op Fl s Ar seconds Ns | Ns Fl g Ar txgs
 .Ar pool
index f67d980ae4c63d8a6c3dfc91a4d6f23242773097..96daf51b696a53aac23272dbef16326113cbd670 100644 (file)
@@ -3273,8 +3273,6 @@ spa_spawn_aux_threads(spa_t *spa)
 {
        ASSERT(spa_writeable(spa));
 
-       ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
        spa_start_raidz_expansion_thread(spa);
        spa_start_indirect_condensing_thread(spa);
        spa_start_livelist_destroy_thread(spa);
@@ -4981,7 +4979,8 @@ spa_ld_read_checkpoint_txg(spa_t *spa)
        int error = 0;
 
        ASSERT0(spa->spa_checkpoint_txg);
-       ASSERT(MUTEX_HELD(&spa_namespace_lock));
+       ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+           spa->spa_load_thread == curthread);
 
        error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
            DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
@@ -5228,6 +5227,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        boolean_t checkpoint_rewind =
            (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
        boolean_t update_config_cache = B_FALSE;
+       hrtime_t load_start = gethrtime();
 
        ASSERT(MUTEX_HELD(&spa_namespace_lock));
        ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
@@ -5272,13 +5272,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
                        return (error);
        }
 
+       /*
+        * Drop the namespace lock for the rest of the function.
+        */
+       spa->spa_load_thread = curthread;
+       mutex_exit(&spa_namespace_lock);
+
        /*
         * Retrieve the checkpoint txg if the pool has a checkpoint.
         */
        spa_import_progress_set_notes(spa, "Loading checkpoint txg");
        error = spa_ld_read_checkpoint_txg(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Retrieve the mapping of indirect vdevs. Those vdevs were removed
@@ -5291,7 +5297,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Loading indirect vdev metadata");
        error = spa_ld_open_indirect_vdev_metadata(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Retrieve the full list of active features from the MOS and check if
@@ -5300,7 +5306,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Checking feature flags");
        error = spa_ld_check_features(spa, &missing_feat_write);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Load several special directories from the MOS needed by the dsl_pool
@@ -5309,7 +5315,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Loading special MOS directories");
        error = spa_ld_load_special_directories(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Retrieve pool properties from the MOS.
@@ -5317,7 +5323,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Loading properties");
        error = spa_ld_get_props(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Retrieve the list of auxiliary devices - cache devices and spares -
@@ -5326,7 +5332,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Loading AUX vdevs");
        error = spa_ld_open_aux_vdevs(spa, type);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Load the metadata for all vdevs. Also check if unopenable devices
@@ -5335,17 +5341,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Loading vdev metadata");
        error = spa_ld_load_vdev_metadata(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        spa_import_progress_set_notes(spa, "Loading dedup tables");
        error = spa_ld_load_dedup_tables(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        spa_import_progress_set_notes(spa, "Loading BRT");
        error = spa_ld_load_brt(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Verify the logs now to make sure we don't have any unexpected errors
@@ -5354,7 +5360,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Verifying Log Devices");
        error = spa_ld_verify_logs(spa, type, ereport);
        if (error != 0)
-               return (error);
+               goto fail;
 
        if (missing_feat_write) {
                ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT);
@@ -5364,8 +5370,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
                 * read-only mode but not read-write mode. We now have enough
                 * information and can return to userland.
                 */
-               return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
-                   ENOTSUP));
+               error = spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
+                   ENOTSUP);
+               goto fail;
        }
 
        /*
@@ -5376,7 +5383,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
        spa_import_progress_set_notes(spa, "Verifying pool data");
        error = spa_ld_verify_pool_data(spa);
        if (error != 0)
-               return (error);
+               goto fail;
 
        /*
         * Calculate the deflated space for the pool. This must be done before
@@ -5501,13 +5508,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
                spa_config_exit(spa, SCL_CONFIG, FTAG);
                spa_import_progress_set_notes(spa, "Finished importing");
        }
+       zio_handle_import_delay(spa, gethrtime() - load_start);
 
        spa_import_progress_remove(spa_guid(spa));
        spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
 
        spa_load_note(spa, "LOADED");
+fail:
+       mutex_enter(&spa_namespace_lock);
+       spa->spa_load_thread = NULL;
+       cv_broadcast(&spa_namespace_cv);
+
+       return (error);
 
-       return (0);
 }
 
 static int
@@ -6757,9 +6770,14 @@ spa_tryimport(nvlist_t *tryconfig)
        /*
         * Create and initialize the spa structure.
         */
+       char *name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+       (void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
+           TRYIMPORT_NAME, (u_longlong_t)curthread, poolname);
+
        mutex_enter(&spa_namespace_lock);
-       spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
+       spa = spa_add(name, tryconfig, NULL);
        spa_activate(spa, SPA_MODE_READ);
+       kmem_free(name, MAXPATHLEN);
 
        /*
         * Rewind pool if a max txg was provided.
@@ -6874,6 +6892,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
 {
        int error;
        spa_t *spa;
+       hrtime_t export_start = gethrtime();
 
        if (oldconfig)
                *oldconfig = NULL;
@@ -7018,6 +7037,9 @@ export_spa:
                spa->spa_is_exporting = B_FALSE;
        }
 
+       if (new_state == POOL_STATE_EXPORTED)
+               zio_handle_export_delay(spa, gethrtime() - export_start);
+
        mutex_exit(&spa_namespace_lock);
        return (0);
 
index 68b907614196b6a6ca69815342a9ca3eba370eeb..5fb7847b5d8b5ef94c780ab29cb936356aa13b38 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -82,7 +82,8 @@
  *             - Check if spa_refcount is zero
  *             - Rename a spa_t
  *             - add/remove/attach/detach devices
- *             - Held for the duration of create/destroy/import/export
+ *             - Held for the duration of create/destroy/export
+ *             - Held at the start and end of import
  *
  *     It does not need to handle recursion.  A create or destroy may
  *     reference objects (files or zvols) in other pools, but by
  * locking is, always, based on spa_namespace_lock and spa_config_lock[].
  */
 
-static avl_tree_t spa_namespace_avl;
+avl_tree_t spa_namespace_avl;
 kmutex_t spa_namespace_lock;
-static kcondvar_t spa_namespace_cv;
+kcondvar_t spa_namespace_cv;
 static const int spa_max_replication_override = SPA_DVAS_PER_BP;
 
 static kmutex_t spa_spare_lock;
@@ -619,6 +620,7 @@ spa_lookup(const char *name)
 
        ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
+retry:
        (void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
 
        /*
@@ -630,6 +632,14 @@ spa_lookup(const char *name)
                *cp = '\0';
 
        spa = avl_find(&spa_namespace_avl, &search, &where);
+       if (spa == NULL)
+               return (NULL);
+
+       if (spa->spa_load_thread != NULL &&
+           spa->spa_load_thread != curthread) {
+               cv_wait(&spa_namespace_cv, &spa_namespace_lock);
+               goto retry;
+       }
 
        return (spa);
 }
@@ -728,6 +738,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
        spa_config_lock_init(spa);
        spa_stats_init(spa);
 
+       ASSERT(MUTEX_HELD(&spa_namespace_lock));
        avl_add(&spa_namespace_avl, spa);
 
        /*
@@ -826,7 +837,6 @@ spa_remove(spa_t *spa)
        nvlist_free(spa->spa_config_splitting);
 
        avl_remove(&spa_namespace_avl, spa);
-       cv_broadcast(&spa_namespace_cv);
 
        if (spa->spa_root)
                spa_strfree(spa->spa_root);
@@ -920,7 +930,8 @@ void
 spa_open_ref(spa_t *spa, const void *tag)
 {
        ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
-           MUTEX_HELD(&spa_namespace_lock));
+           MUTEX_HELD(&spa_namespace_lock) ||
+           spa->spa_load_thread == curthread);
        (void) zfs_refcount_add(&spa->spa_refcount, tag);
 }
 
@@ -932,7 +943,8 @@ void
 spa_close(spa_t *spa, const void *tag)
 {
        ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
-           MUTEX_HELD(&spa_namespace_lock));
+           MUTEX_HELD(&spa_namespace_lock) ||
+           spa->spa_load_thread == curthread);
        (void) zfs_refcount_remove(&spa->spa_refcount, tag);
 }
 
index 5aaef1a69986ff0f41b8f0127c7df60a323a57f9..c5e16af16692e6c72f97e04c9020888734507261 100644 (file)
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2024 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -775,7 +775,8 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
 void
 vdev_initialize_restart(vdev_t *vd)
 {
-       ASSERT(MUTEX_HELD(&spa_namespace_lock));
+       ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+           vd->vdev_spa->spa_load_thread == curthread);
        ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
 
        if (vd->vdev_leaf_zap != 0) {
index 6503390f79732f4c9d856755e7645c5a392a6d19..00ebd4c9fca4dcd987782559e267a5deda5a597c 100644 (file)
@@ -23,6 +23,7 @@
  * Copyright (c) 2018, Intel Corporation.
  * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
  * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
+ * Copyright (c) 2024 by Delphix. All rights reserved.
  */
 
 #include <sys/vdev_impl.h>
@@ -1071,7 +1072,8 @@ vdev_rebuild_restart_impl(vdev_t *vd)
 void
 vdev_rebuild_restart(spa_t *spa)
 {
-       ASSERT(MUTEX_HELD(&spa_namespace_lock));
+       ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+           spa->spa_load_thread == curthread);
 
        vdev_rebuild_restart_impl(spa->spa_root_vdev);
 }
index 7e3c5f684703da162d0289d39a2ceded6b2537b7..9753d5a1ea04ece113f3692f99d8579af674ecdc 100644 (file)
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2024 by Delphix. All rights reserved.
  * Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
  * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
  * Copyright 2023 RackTop Systems, Inc.
@@ -1148,7 +1148,8 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
 void
 vdev_trim_restart(vdev_t *vd)
 {
-       ASSERT(MUTEX_HELD(&spa_namespace_lock));
+       ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+           vd->vdev_spa->spa_load_thread == curthread);
        ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
 
        if (vd->vdev_leaf_zap != 0) {
@@ -1568,8 +1569,8 @@ vdev_autotrim_stop_all(spa_t *spa)
 void
 vdev_autotrim_restart(spa_t *spa)
 {
-       ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
+       ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+           spa->spa_load_thread == curthread);
        if (spa->spa_autotrim)
                vdev_autotrim(spa);
 }
index 1af2c26f8a439ec5bcfc38487492dab281025153..3773e400d7997dddf965c4332b4861db40b59d7d 100644 (file)
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2024, Klara Inc.
  */
 
 /*
@@ -59,6 +60,7 @@ uint32_t zio_injection_enabled = 0;
 typedef struct inject_handler {
        int                     zi_id;
        spa_t                   *zi_spa;
+       char                    *zi_spa_name; /* ZINJECT_DELAY_IMPORT only */
        zinject_record_t        zi_record;
        uint64_t                *zi_lanes;
        int                     zi_next_lane;
@@ -703,6 +705,63 @@ zio_handle_io_delay(zio_t *zio)
        return (min_target);
 }
 
+static void
+zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command)
+{
+       inject_handler_t *handler;
+       hrtime_t delay = 0;
+       int id = 0;
+
+       rw_enter(&inject_lock, RW_READER);
+
+       for (handler = list_head(&inject_handlers);
+           handler != NULL && handler->zi_record.zi_cmd == command;
+           handler = list_next(&inject_handlers, handler)) {
+               ASSERT3P(handler->zi_spa_name, !=, NULL);
+               if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) {
+                       uint64_t pause =
+                           SEC2NSEC(handler->zi_record.zi_duration);
+                       if (pause > elapsed) {
+                               delay = pause - elapsed;
+                       }
+                       id = handler->zi_id;
+                       break;
+               }
+       }
+
+       rw_exit(&inject_lock);
+
+       if (delay) {
+               if (command == ZINJECT_DELAY_IMPORT) {
+                       spa_import_progress_set_notes(spa, "injecting %llu "
+                           "sec delay", (u_longlong_t)NSEC2SEC(delay));
+               }
+               zfs_sleep_until(gethrtime() + delay);
+       }
+       if (id) {
+               /* all done with this one-shot handler */
+               zio_clear_fault(id);
+       }
+}
+
+/*
+ * For testing, inject a delay during an import
+ */
+void
+zio_handle_import_delay(spa_t *spa, hrtime_t elapsed)
+{
+       zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_IMPORT);
+}
+
+/*
+ * For testing, inject a delay during an export
+ */
+void
+zio_handle_export_delay(spa_t *spa, hrtime_t elapsed)
+{
+       zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_EXPORT);
+}
+
 static int
 zio_calculate_range(const char *pool, zinject_record_t *record)
 {
@@ -760,6 +819,28 @@ zio_calculate_range(const char *pool, zinject_record_t *record)
        return (0);
 }
 
+static boolean_t
+zio_pool_handler_exists(const char *name, zinject_type_t command)
+{
+       boolean_t exists = B_FALSE;
+
+       rw_enter(&inject_lock, RW_READER);
+       for (inject_handler_t *handler = list_head(&inject_handlers);
+           handler != NULL; handler = list_next(&inject_handlers, handler)) {
+               if (command != handler->zi_record.zi_cmd)
+                       continue;
+
+               const char *pool = (handler->zi_spa_name != NULL) ?
+                   handler->zi_spa_name : spa_name(handler->zi_spa);
+               if (strcmp(name, pool) == 0) {
+                       exists = B_TRUE;
+                       break;
+               }
+       }
+       rw_exit(&inject_lock);
+
+       return (exists);
+}
 /*
  * Create a new handler for the given record.  We add it to the list, adding
  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
@@ -810,16 +891,42 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
 
        if (!(flags & ZINJECT_NULL)) {
                /*
-                * spa_inject_ref() will add an injection reference, which will
-                * prevent the pool from being removed from the namespace while
-                * still allowing it to be unloaded.
+                * Pool delays for import or export don't take an
+                * injection reference on the spa. Instead they
+                * rely on matching by name.
                 */
-               if ((spa = spa_inject_addref(name)) == NULL)
-                       return (SET_ERROR(ENOENT));
+               if (record->zi_cmd == ZINJECT_DELAY_IMPORT ||
+                   record->zi_cmd == ZINJECT_DELAY_EXPORT) {
+                       if (record->zi_duration <= 0)
+                               return (SET_ERROR(EINVAL));
+                       /*
+                        * Only one import | export delay handler per pool.
+                        */
+                       if (zio_pool_handler_exists(name, record->zi_cmd))
+                               return (SET_ERROR(EEXIST));
+
+                       mutex_enter(&spa_namespace_lock);
+                       boolean_t has_spa = spa_lookup(name) != NULL;
+                       mutex_exit(&spa_namespace_lock);
+
+                       if (record->zi_cmd == ZINJECT_DELAY_IMPORT && has_spa)
+                               return (SET_ERROR(EEXIST));
+                       if (record->zi_cmd == ZINJECT_DELAY_EXPORT && !has_spa)
+                               return (SET_ERROR(ENOENT));
+                       spa = NULL;
+               } else {
+                       /*
+                        * spa_inject_ref() will add an injection reference,
+                        * which will prevent the pool from being removed
+                        * from the namespace while still allowing it to be
+                        * unloaded.
+                        */
+                       if ((spa = spa_inject_addref(name)) == NULL)
+                               return (SET_ERROR(ENOENT));
+               }
 
                handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
-
-               handler->zi_spa = spa;
+               handler->zi_spa = spa;  /* note: can be NULL */
                handler->zi_record = *record;
 
                if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
@@ -832,6 +939,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
                        handler->zi_next_lane = 0;
                }
 
+               if (handler->zi_spa == NULL)
+                       handler->zi_spa_name = spa_strdup(name);
+               else
+                       handler->zi_spa_name = NULL;
+
                rw_enter(&inject_lock, RW_WRITER);
 
                /*
@@ -891,7 +1003,11 @@ zio_inject_list_next(int *id, char *name, size_t buflen,
        if (handler) {
                *record = handler->zi_record;
                *id = handler->zi_id;
-               (void) strlcpy(name, spa_name(handler->zi_spa), buflen);
+               ASSERT(handler->zi_spa || handler->zi_spa_name);
+               if (handler->zi_spa != NULL)
+                       (void) strlcpy(name, spa_name(handler->zi_spa), buflen);
+               else
+                       (void) strlcpy(name, handler->zi_spa_name, buflen);
                ret = 0;
        } else {
                ret = SET_ERROR(ENOENT);
@@ -941,7 +1057,11 @@ zio_clear_fault(int id)
                ASSERT3P(handler->zi_lanes, ==, NULL);
        }
 
-       spa_inject_delref(handler->zi_spa);
+       if (handler->zi_spa_name != NULL)
+               spa_strfree(handler->zi_spa_name);
+
+       if (handler->zi_spa != NULL)
+               spa_inject_delref(handler->zi_spa);
        kmem_free(handler, sizeof (inject_handler_t));
        atomic_dec_32(&zio_injection_enabled);
 
index 558cd425afd81ad35b90829de6395b86e5e606af..0586d991b8028be40b3d677581f2a9a1016df942 100644 (file)
@@ -466,7 +466,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
     'import_paths_changed',
     'import_rewind_config_changed',
     'import_rewind_device_replaced',
-    'zpool_import_status']
+    'zpool_import_status', 'zpool_import_parallel_pos',
+    'zpool_import_parallel_neg', 'zpool_import_parallel_admin']
 tags = ['functional', 'cli_root', 'zpool_import']
 timeout = 1200
 
index f182a2825cd6c76cf386383caee6122106c4100d..dc447e0422259f165afad27f1423e2619a61c813 100644 (file)
@@ -1144,6 +1144,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
        functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \
        functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh \
        functional/cli_root/zpool_import/zpool_import_status.ksh \
+       functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh \
+       functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh \
+       functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh \
        functional/cli_root/zpool_initialize/cleanup.ksh \
        functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \
        functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh
new file mode 100755 (executable)
index 0000000..c681d1b
--- /dev/null
@@ -0,0 +1,165 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2023 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+#      Verify that admin commands to different pool are not blocked by import
+#
+# STRATEGY:
+#      1. Create 2 pools
+#      2. Export one of the pools
+#      4. Import the pool with an injected delay
+#      5. Execute some admin commands against both pools
+#      6. Verify that the admin commands to the non-imported pool don't stall
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+       zinject -c all
+       destroy_pool $TESTPOOL1
+       destroy_pool $TESTPOOL2
+}
+
+function pool_import
+{
+       typeset dir=$1
+       typeset pool=$2
+
+       SECONDS=0
+       errmsg=$(zpool import -d $dir -f $pool 2>&1 > /dev/null)
+       if [[ $? -eq 0 ]]; then
+               echo ${pool}: imported in $SECONDS secs
+               echo $SECONDS > ${DEVICE_DIR}/${pool}-import
+       else
+               echo ${pool}: import failed $errmsg in $SECONDS secs
+       fi
+}
+
+function pool_add_device
+{
+       typeset pool=$1
+       typeset device=$2
+       typeset devtype=$3
+
+       SECONDS=0
+       errmsg=$(zpool add $pool $devtype $device 2>&1 > /dev/null)
+       if [[ $? -eq 0 ]]; then
+               echo ${pool}: added $devtype vdev in $SECONDS secs
+               echo $SECONDS > ${DEVICE_DIR}/${pool}-add
+       else
+               echo ${pool}: add $devtype vdev failed ${errmsg}, in $SECONDS secs
+       fi
+}
+
+function pool_stats
+{
+       typeset stats=$1
+       typeset pool=$2
+
+       SECONDS=0
+       errmsg=$(zpool $stats $pool 2>&1 > /dev/null)
+       if [[ $? -eq 0 ]]; then
+               echo ${pool}: $stats in $SECONDS secs
+               echo $SECONDS > ${DEVICE_DIR}/${pool}-${stats}
+       else
+               echo ${pool}: $stats failed ${errmsg}, in $SECONDS secs
+       fi
+}
+
+function pool_create
+{
+       typeset pool=$1
+       typeset device=$2
+
+       SECONDS=0
+       errmsg=$(zpool create $pool $device 2>&1 > /dev/null)
+       if [[ $? -eq 0 ]]; then
+               echo ${pool}: created in $SECONDS secs
+               echo $SECONDS > ${DEVICE_DIR}/${pool}-create
+       else
+               echo ${pool}: create failed ${errmsg}, in $SECONDS secs
+       fi
+}
+
+log_assert "Simple admin commands to different pool not blocked by import"
+
+log_onexit cleanup
+
+#
+# create two pools and export one
+#
+log_must zpool create $TESTPOOL1 $VDEV0
+log_must zpool export $TESTPOOL1
+log_must zpool create $TESTPOOL2 $VDEV1
+
+#
+# import pool asyncronously with an injected 10 second delay
+#
+log_must zinject -P import -s 10 $TESTPOOL1
+pool_import $DEVICE_DIR $TESTPOOL1 &
+
+sleep 2
+
+#
+# run some admin commands on the pools while the import is in progress
+#
+
+pool_add_device $TESTPOOL1 $VDEV2 "log" &
+pool_add_device $TESTPOOL2 $VDEV3 "cache" &
+pool_stats "status" $TESTPOOL1 &
+pool_stats "status" $TESTPOOL2 &
+pool_stats "list" $TESTPOOL1 &
+pool_stats "list" $TESTPOOL2 &
+pool_create $TESTPOOL1 $VDEV4 &
+wait
+
+log_must zpool sync $TESTPOOL1 $TESTPOOL2
+
+zpool history $TESTPOOL1
+zpool history $TESTPOOL2
+
+log_must test "5" -lt $(<${DEVICE_DIR}/${TESTPOOL1}-import)
+
+#
+# verify that commands to second pool did not wait for import to finish
+#
+log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-status)
+log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-list)
+log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-add)
+[[ -e ${DEVICE_DIR}/${TESTPOOL1}-create ]] && log_fail "unexpected pool create"
+
+log_pass "Simple admin commands to different pool not blocked by import"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh
new file mode 100755 (executable)
index 0000000..339dc25
--- /dev/null
@@ -0,0 +1,130 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2023 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+#      Verify that pool imports by same name only have one winner
+#
+# STRATEGY:
+#      1. Create 4 single disk pools with the same name
+#      2. Generate some ZIL records (for a longer import)
+#      3. Export the pools
+#      4. Import the pools in parallel
+#      5. Repeat with using matching guids
+#
+
+verify_runnable "global"
+
+POOLNAME="import_pool"
+DEV_DIR_PREFIX="$DEVICE_DIR/$POOLNAME"
+VDEVSIZE=$((512 * 1024 * 1024))
+
+log_assert "parallel pool imports by same name only have one winner"
+
+# each pool has its own device directory
+for i in {0..3}; do
+       log_must mkdir -p ${DEV_DIR_PREFIX}$i
+       log_must truncate -s $VDEVSIZE ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
+done
+
+function cleanup
+{
+       zinject -c all
+       log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
+       log_must set_tunable64 METASLAB_DEBUG_LOAD 0
+
+       destroy_pool $POOLNAME
+
+       log_must rm -rf $DEV_DIR_PREFIX*
+}
+
+log_onexit cleanup
+
+log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
+log_must set_tunable64 METASLAB_DEBUG_LOAD 1
+
+function import_pool
+{
+       typeset dir=$1
+       typeset pool=$2
+       typeset newname=$3
+
+       SECONDS=0
+       errmsg=$(zpool import -N -d $dir -f $pool $newname 2>&1 > /dev/null)
+       if [[ $? -eq 0 ]]; then
+               touch $dir/imported
+               echo "imported $pool in $SECONDS secs"
+       elif [[ $errmsg == *"cannot import"* ]]; then
+               echo "pool import failed: $errmsg, waited $SECONDS secs"
+               touch $dir/failed
+       fi
+}
+
+#
+# create four exported pools with the same name
+#
+for i in {0..3}; do
+       log_must zpool create $POOLNAME ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
+       log_must zpool export $POOLNAME
+done
+log_must zinject -P import -s 10 $POOLNAME
+
+#
+# import the pools in parallel, expecting only one winner
+#
+for i in {0..3}; do
+       import_pool ${DEV_DIR_PREFIX}$i $POOLNAME &
+done
+wait
+
+# check the result of background imports
+typeset num_imports=0
+typeset num_cannot=0
+for i in {0..3}; do
+       if [[ -f ${DEV_DIR_PREFIX}$i/imported ]]; then
+               ((num_imports += 1))
+       fi
+       if [[ -f ${DEV_DIR_PREFIX}$i/failed ]]; then
+               ((num_cannot += 1))
+               loser=$i
+       fi
+done
+[[ $num_imports -eq "1" ]] || log_fail "expecting an import"
+[[ $num_cannot -eq "3" ]] || \
+    log_fail "expecting 3 pool exists errors, found $num_cannot"
+
+log_note "$num_imports imported and $num_cannot failed (expected)"
+
+log_pass "parallel pool imports by same name only have one winner"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
new file mode 100755 (executable)
index 0000000..71b2437
--- /dev/null
@@ -0,0 +1,137 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2023 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+# test uses 8 vdevs
+export MAX_NUM=8
+
+#
+# DESCRIPTION:
+#      Verify that pool imports can occur in parallel
+#
+# STRATEGY:
+#      1. Create 8 pools
+#      2. Generate some ZIL records
+#      3. Export the pools
+#      4. Import half of the pools synchronously to baseline sequential cost
+#      5. Import the other half asynchronously to demonstrate parallel savings
+#      6. Export 4 pools
+#      7. Test zpool import -a
+#
+
+verify_runnable "global"
+
+#
+# override the minimum sized vdevs
+#
+VDEVSIZE=$((512 * 1024 * 1024))
+increase_device_sizes $VDEVSIZE
+
+POOLNAME="import_pool"
+
+function cleanup
+{
+       zinject -c all
+       log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
+       log_must set_tunable64 METASLAB_DEBUG_LOAD 0
+
+       for i in {0..$(($MAX_NUM - 1))}; do
+               destroy_pool $POOLNAME-$i
+       done
+       # reset the devices
+       increase_device_sizes 0
+       increase_device_sizes $FILE_SIZE
+}
+
+log_assert "Pool imports can occur in parallel"
+
+log_onexit cleanup
+
+log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
+log_must set_tunable64 METASLAB_DEBUG_LOAD 1
+
+
+#
+# create some exported pools with import delay injectors
+#
+for i in {0..$(($MAX_NUM - 1))}; do
+       log_must zpool create $POOLNAME-$i $DEVICE_DIR/${DEVICE_FILE}$i
+       log_must zpool export $POOLNAME-$i
+       log_must zinject -P import -s 12 $POOLNAME-$i
+done
+wait
+
+#
+# import half of the pools synchronously
+#
+SECONDS=0
+for i in {0..3}; do
+       log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i
+done
+sequential_time=$SECONDS
+log_note "sequentially imported 4 pools in $sequential_time seconds"
+
+#
+# import half of the pools in parallel
+#
+SECONDS=0
+for i in {4..7}; do
+       log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i &
+done
+wait
+parallel_time=$SECONDS
+log_note "asyncronously imported 4 pools in $parallel_time seconds"
+
+log_must test $parallel_time -lt $(($sequential_time / 3))
+
+#
+# export pools with import delay injectors
+#
+for i in {4..7}; do
+       log_must zpool export $POOLNAME-$i
+       log_must zinject -P import -s 12 $POOLNAME-$i
+done
+wait
+
+#
+# now test zpool import -a
+#
+SECONDS=0
+log_must zpool import -a -d $DEVICE_DIR -f
+parallel_time=$SECONDS
+log_note "asyncronously imported 4 pools in $parallel_time seconds"
+
+log_must test $parallel_time -lt $(($sequential_time / 3))
+
+log_pass "Pool imports occur in parallel"