]> git.proxmox.com Git - mirror_zfs.git/commitdiff
vdev probe to slow disk can stall mmp write checker
authorDon Brady <don.brady@delphix.com>
Mon, 29 Apr 2024 21:35:53 +0000 (15:35 -0600)
committerTony Hutter <hutter2@llnl.gov>
Tue, 30 Apr 2024 17:01:15 +0000 (10:01 -0700)
Simplify vdev probes in the zio_vdev_io_done context to
avoid holding the spa config lock for a long duration.

Also allow zpool clear if no evidence of another host
is using the pool.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@klarasystems.com>
Closes #15839

16 files changed:
cmd/zpool/zpool_main.c
include/sys/spa.h
include/sys/uberblock_impl.h
include/sys/vdev_impl.h
man/man8/zpool-clear.8
module/zfs/mmp.c
module/zfs/spa.c
module/zfs/txg.c
module/zfs/vdev.c
module/zfs/vdev_label.c
module/zfs/zfs_ioctl.c
module/zfs/zio.c
module/zfs/zio_inject.c
tests/runfiles/linux.run
tests/zfs-tests/tests/Makefile.am
tests/zfs-tests/tests/functional/mmp/mmp_write_slow_disk.ksh [new file with mode: 0755]

index 20b1c85065b46e0bef57ed43ea0d40ae74b97062..ed0b8d7a12d75a7c8745311fb53d0b82db06a755 100644 (file)
@@ -8910,7 +8910,7 @@ status_callback(zpool_handle_t *zhp, void *data)
                printf_color(ANSI_BOLD, gettext("action: "));
                printf_color(ANSI_YELLOW, gettext("Make sure the pool's devices"
                    " are connected, then reboot your system and\n\timport the "
-                   "pool.\n"));
+                   "pool or run 'zpool clear' to resume the pool.\n"));
                break;
 
        case ZPOOL_STATUS_IO_FAILURE_WAIT:
index 3112ba09ceb46ee9178f1dc30ad8881c0a1263cb..6611141b95698d887739e968ab3da249bf37f066 100644 (file)
@@ -769,7 +769,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
 
 #define        SPA_ASYNC_CONFIG_UPDATE                 0x01
 #define        SPA_ASYNC_REMOVE                        0x02
-#define        SPA_ASYNC_PROBE                         0x04
+#define        SPA_ASYNC_FAULT_VDEV                    0x04
 #define        SPA_ASYNC_RESILVER_DONE                 0x08
 #define        SPA_ASYNC_RESILVER                      0x10
 #define        SPA_ASYNC_AUTOEXPAND                    0x20
@@ -1113,6 +1113,8 @@ extern uint32_t spa_get_hostid(spa_t *spa);
 extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
 extern boolean_t spa_livelist_delete_check(spa_t *spa);
 
+extern boolean_t spa_mmp_remote_host_activity(spa_t *spa);
+
 extern spa_mode_t spa_mode(spa_t *spa);
 extern uint64_t zfs_strtonum(const char *str, char **nptr);
 
index 03bcfa8f4dd1576dffb7e62b66d42c0abc36d217..13fce9c29e2d38dc25e3ba922acbb1893efbacc4 100644 (file)
@@ -50,20 +50,20 @@ extern "C" {
 #define        MMP_SEQ_VALID_BIT       0x02
 #define        MMP_FAIL_INT_VALID_BIT  0x04
 
-#define        MMP_VALID(ubp)          (ubp->ub_magic == UBERBLOCK_MAGIC && \
-                                   ubp->ub_mmp_magic == MMP_MAGIC)
-#define        MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+#define        MMP_VALID(ubp)          ((ubp)->ub_magic == UBERBLOCK_MAGIC && \
+                                   (ubp)->ub_mmp_magic == MMP_MAGIC)
+#define        MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
                                    MMP_INTERVAL_VALID_BIT))
-#define        MMP_SEQ_VALID(ubp)      (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+#define        MMP_SEQ_VALID(ubp)      (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
                                    MMP_SEQ_VALID_BIT))
-#define        MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+#define        MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
                                    MMP_FAIL_INT_VALID_BIT))
 
-#define        MMP_INTERVAL(ubp)       ((ubp->ub_mmp_config & 0x00000000FFFFFF00) \
+#define        MMP_INTERVAL(ubp)       (((ubp)->ub_mmp_config & 0x00000000FFFFFF00) \
                                    >> 8)
-#define        MMP_SEQ(ubp)            ((ubp->ub_mmp_config & 0x0000FFFF00000000) \
+#define        MMP_SEQ(ubp)            (((ubp)->ub_mmp_config & 0x0000FFFF00000000) \
                                    >> 32)
-#define        MMP_FAIL_INT(ubp)       ((ubp->ub_mmp_config & 0xFFFF000000000000) \
+#define        MMP_FAIL_INT(ubp)       (((ubp)->ub_mmp_config & 0xFFFF000000000000) \
                                    >> 48)
 
 #define        MMP_INTERVAL_SET(write) \
index 02948894c3657998a122f668c8b9a05bafbffd2d..8c6ab316fa184aa99dba2aa4e6ac9b8da97b0e9e 100644 (file)
@@ -274,7 +274,7 @@ struct vdev {
        txg_list_t      vdev_dtl_list;  /* per-txg dirty DTL lists      */
        txg_node_t      vdev_txg_node;  /* per-txg dirty vdev linkage   */
        boolean_t       vdev_remove_wanted; /* async remove wanted?     */
-       boolean_t       vdev_probe_wanted; /* async probe wanted?       */
+       boolean_t       vdev_fault_wanted; /* async faulted wanted?     */
        list_node_t     vdev_config_dirty_node; /* config dirty list    */
        list_node_t     vdev_state_dirty_node; /* state dirty list      */
        uint64_t        vdev_deflate_ratio; /* deflation ratio (x512)   */
index c61ecae483ac1cbe5779c923308cdfb54102513b..3e448be87fc25f69c08821618a930431da1c74db 100644 (file)
@@ -50,9 +50,10 @@ If the pool was suspended it will be brought back online provided the
 devices can be accessed.
 Pools with
 .Sy multihost
-enabled which have been suspended cannot be resumed.
-While the pool was suspended, it may have been imported on
-another host, and resuming I/O could result in pool damage.
+enabled which have been suspended cannot be resumed when there is evidence
+that the pool was imported by another host.
+The same checks performed during an import will be applied before the clear
+proceeds.
 .Bl -tag -width Ds
 .It Fl -power
 Power on the devices's slot in the storage enclosure and wait for the device
index 25eea0752941449f17eef1a94f60f9a184451da2..8144d89650853e27ec551800f99ae93d0ad642c7 100644 (file)
@@ -663,12 +663,13 @@ mmp_thread(void *arg)
                    (gethrtime() - mmp->mmp_last_write) > mmp_fail_ns) {
                        zfs_dbgmsg("MMP suspending pool '%s': gethrtime %llu "
                            "mmp_last_write %llu mmp_interval %llu "
-                           "mmp_fail_intervals %llu mmp_fail_ns %llu",
+                           "mmp_fail_intervals %llu mmp_fail_ns %llu txg %llu",
                            spa_name(spa), (u_longlong_t)gethrtime(),
                            (u_longlong_t)mmp->mmp_last_write,
                            (u_longlong_t)mmp_interval,
                            (u_longlong_t)mmp_fail_intervals,
-                           (u_longlong_t)mmp_fail_ns);
+                           (u_longlong_t)mmp_fail_ns,
+                           (u_longlong_t)spa->spa_uberblock.ub_txg);
                        cmn_err(CE_WARN, "MMP writes to pool '%s' have not "
                            "succeeded in over %llu ms; suspending pool. "
                            "Hrtime %llu",
index fba7846955fd8890a036e98e89180e11dc0903af..251dd8a4d1c717f4d049d728ec929f87edea29e1 100644 (file)
@@ -3542,11 +3542,16 @@ spa_activity_check_duration(spa_t *spa, uberblock_t *ub)
 }
 
 /*
- * Perform the import activity check.  If the user canceled the import or
- * we detected activity then fail.
+ * Remote host activity check.
+ *
+ * error results:
+ *          0 - no activity detected
+ *  EREMOTEIO - remote activity detected
+ *      EINTR - user canceled the operation
  */
 static int
-spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
+spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config,
+    boolean_t importing)
 {
        uint64_t txg = ub->ub_txg;
        uint64_t timestamp = ub->ub_timestamp;
@@ -3591,19 +3596,23 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
 
        import_expire = gethrtime() + import_delay;
 
-       spa_import_progress_set_notes(spa, "Checking MMP activity, waiting "
-           "%llu ms", (u_longlong_t)NSEC2MSEC(import_delay));
+       if (importing) {
+               spa_import_progress_set_notes(spa, "Checking MMP activity, "
+                   "waiting %llu ms", (u_longlong_t)NSEC2MSEC(import_delay));
+       }
 
-       int interations = 0;
+       int iterations = 0;
        while ((now = gethrtime()) < import_expire) {
-               if (interations++ % 30 == 0) {
+               if (importing && iterations++ % 30 == 0) {
                        spa_import_progress_set_notes(spa, "Checking MMP "
                            "activity, %llu ms remaining",
                            (u_longlong_t)NSEC2MSEC(import_expire - now));
                }
 
-               (void) spa_import_progress_set_mmp_check(spa_guid(spa),
-                   NSEC2SEC(import_expire - gethrtime()));
+               if (importing) {
+                       (void) spa_import_progress_set_mmp_check(spa_guid(spa),
+                           NSEC2SEC(import_expire - gethrtime()));
+               }
 
                vdev_uberblock_load(rvd, ub, &mmp_label);
 
@@ -3685,6 +3694,61 @@ out:
        return (error);
 }
 
+/*
+ * Called from zfs_ioc_clear for a pool that was suspended
+ * after failing mmp write checks.
+ */
+boolean_t
+spa_mmp_remote_host_activity(spa_t *spa)
+{
+       ASSERT(spa_multihost(spa) && spa_suspended(spa));
+
+       nvlist_t *best_label;
+       uberblock_t best_ub;
+
+       /*
+        * Locate the best uberblock on disk
+        */
+       vdev_uberblock_load(spa->spa_root_vdev, &best_ub, &best_label);
+       if (best_label) {
+               /*
+                * confirm that the best hostid matches our hostid
+                */
+               if (nvlist_exists(best_label, ZPOOL_CONFIG_HOSTID) &&
+                   spa_get_hostid(spa) !=
+                   fnvlist_lookup_uint64(best_label, ZPOOL_CONFIG_HOSTID)) {
+                       nvlist_free(best_label);
+                       return (B_TRUE);
+               }
+               nvlist_free(best_label);
+       } else {
+               return (B_TRUE);
+       }
+
+       if (!MMP_VALID(&best_ub) ||
+           !MMP_FAIL_INT_VALID(&best_ub) ||
+           MMP_FAIL_INT(&best_ub) == 0) {
+               return (B_TRUE);
+       }
+
+       if (best_ub.ub_txg != spa->spa_uberblock.ub_txg ||
+           best_ub.ub_timestamp != spa->spa_uberblock.ub_timestamp) {
+               zfs_dbgmsg("txg mismatch detected during pool clear "
+                   "txg %llu ub_txg %llu timestamp %llu ub_timestamp %llu",
+                   (u_longlong_t)spa->spa_uberblock.ub_txg,
+                   (u_longlong_t)best_ub.ub_txg,
+                   (u_longlong_t)spa->spa_uberblock.ub_timestamp,
+                   (u_longlong_t)best_ub.ub_timestamp);
+               return (B_TRUE);
+       }
+
+       /*
+        * Perform an activity check looking for any remote writer
+        */
+       return (spa_activity_check(spa, &spa->spa_uberblock, spa->spa_config,
+           B_FALSE) != 0);
+}
+
 static int
 spa_verify_host(spa_t *spa, nvlist_t *mos_config)
 {
@@ -4005,7 +4069,8 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
                        return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
                }
 
-               int error = spa_activity_check(spa, ub, spa->spa_config);
+               int error =
+                   spa_activity_check(spa, ub, spa->spa_config, B_TRUE);
                if (error) {
                        nvlist_free(label);
                        return (error);
@@ -8589,15 +8654,16 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
 }
 
 static void
-spa_async_probe(spa_t *spa, vdev_t *vd)
+spa_async_fault_vdev(spa_t *spa, vdev_t *vd)
 {
-       if (vd->vdev_probe_wanted) {
-               vd->vdev_probe_wanted = B_FALSE;
-               vdev_reopen(vd);        /* vdev_open() does the actual probe */
+       if (vd->vdev_fault_wanted) {
+               vd->vdev_fault_wanted = B_FALSE;
+               vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
+                   VDEV_AUX_ERR_EXCEEDED);
        }
 
        for (int c = 0; c < vd->vdev_children; c++)
-               spa_async_probe(spa, vd->vdev_child[c]);
+               spa_async_fault_vdev(spa, vd->vdev_child[c]);
 }
 
 static void
@@ -8685,11 +8751,11 @@ spa_async_thread(void *arg)
        }
 
        /*
-        * See if any devices need to be probed.
+        * See if any devices need to be marked faulted.
         */
-       if (tasks & SPA_ASYNC_PROBE) {
+       if (tasks & SPA_ASYNC_FAULT_VDEV) {
                spa_vdev_state_enter(spa, SCL_NONE);
-               spa_async_probe(spa, spa->spa_root_vdev);
+               spa_async_fault_vdev(spa, spa->spa_root_vdev);
                (void) spa_vdev_state_exit(spa, NULL, 0);
        }
 
index a67c043446f5de0d9927fab4e3b87e25875ade03..5ce6be69be14cc4d033a822e098a85d5baa37685 100644 (file)
@@ -550,6 +550,15 @@ txg_sync_thread(void *arg)
                        timer = (delta > timeout ? 0 : timeout - delta);
                }
 
+               /*
+                * When we're suspended, nothing should be changing and for
+                * MMP we don't want to bump anything that would make it
+                * harder to detect if another host is changing it when
+                * resuming after a MMP suspend.
+                */
+               if (spa_suspended(spa))
+                       continue;
+
                /*
                 * Wait until the quiesce thread hands off a txg to us,
                 * prompting it to do so if necessary.
index 6d8eb50a1a6e705ab896a792200e228ab50c20a1..981da4e986c4e6d785776d26fa803ad4cdac94c5 100644 (file)
@@ -1661,6 +1661,7 @@ vdev_metaslab_fini(vdev_t *vd)
 typedef struct vdev_probe_stats {
        boolean_t       vps_readable;
        boolean_t       vps_writeable;
+       boolean_t       vps_zio_done_probe;
        int             vps_flags;
 } vdev_probe_stats_t;
 
@@ -1704,6 +1705,17 @@ vdev_probe_done(zio_t *zio)
                        (void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
                            spa, vd, NULL, NULL, 0);
                        zio->io_error = SET_ERROR(ENXIO);
+
+                       /*
+                        * If this probe was initiated from zio pipeline, then
+                        * change the state in a spa_async_request. Probes that
+                        * were initiated from a vdev_open can change the state
+                        * as part of the open call.
+                        */
+                       if (vps->vps_zio_done_probe) {
+                               vd->vdev_fault_wanted = B_TRUE;
+                               spa_async_request(spa, SPA_ASYNC_FAULT_VDEV);
+                       }
                }
 
                mutex_enter(&vd->vdev_probe_lock);
@@ -1754,6 +1766,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
 
                vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
                    ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;
+               vps->vps_zio_done_probe = (zio != NULL);
 
                if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
                        /*
@@ -1780,15 +1793,6 @@ vdev_probe(vdev_t *vd, zio_t *zio)
                vd->vdev_probe_zio = pio = zio_null(NULL, spa, vd,
                    vdev_probe_done, vps,
                    vps->vps_flags | ZIO_FLAG_DONT_PROPAGATE);
-
-               /*
-                * We can't change the vdev state in this context, so we
-                * kick off an async task to do it on our behalf.
-                */
-               if (zio != NULL) {
-                       vd->vdev_probe_wanted = B_TRUE;
-                       spa_async_request(spa, SPA_ASYNC_PROBE);
-               }
        }
 
        if (zio != NULL)
index 737d8b33e1886e343a472a246bab466586c8e7bb..5c0e750c46142c6b40eca8686ca316422384e6b5 100644 (file)
@@ -1982,6 +1982,7 @@ retry:
        /*
         * If this isn't a resync due to I/O errors,
         * and nothing changed in this transaction group,
+        * and multihost protection isn't enabled,
         * and the vdev configuration hasn't changed,
         * then there's nothing to do.
         */
@@ -1989,7 +1990,8 @@ retry:
                boolean_t changed = uberblock_update(ub, spa->spa_root_vdev,
                    txg, spa->spa_mmp.mmp_delay);
 
-               if (!changed && list_is_empty(&spa->spa_config_dirty_list))
+               if (!changed && list_is_empty(&spa->spa_config_dirty_list) &&
+                   !spa_multihost(spa))
                        return (0);
        }
 
index ac8329185ccada4da4d394c95aa3e280d441b6aa..b0783711329331eaf3bcd62ea27b11b194238731 100644 (file)
@@ -5800,10 +5800,13 @@ zfs_ioc_clear(zfs_cmd_t *zc)
 
        /*
         * If multihost is enabled, resuming I/O is unsafe as another
-        * host may have imported the pool.
+        * host may have imported the pool. Check for remote activity.
         */
-       if (spa_multihost(spa) && spa_suspended(spa))
-               return (SET_ERROR(EINVAL));
+       if (spa_multihost(spa) && spa_suspended(spa) &&
+           spa_mmp_remote_host_activity(spa)) {
+               spa_close(spa, FTAG);
+               return (SET_ERROR(EREMOTEIO));
+       }
 
        spa_vdev_state_enter(spa, SCL_NONE);
 
index d0b4016237b937bf7088fa926d76ecfcf561fa0e..046e6d64c1a9b96786509bbc022ad94ad56e1fac 100644 (file)
@@ -2503,8 +2503,10 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
                    "failure and the failure mode property for this pool "
                    "is set to panic.", spa_name(spa));
 
-       cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O "
-           "failure and has been suspended.\n", spa_name(spa));
+       if (reason != ZIO_SUSPEND_MMP) {
+               cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable "
+                   "I/O failure and has been suspended.\n", spa_name(spa));
+       }
 
        (void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
            NULL, NULL, 0);
index 609182f4a2cd0826db6c5b0070ff21f57e817090..66ad72fb88e9f094e98283de97046e0b2355d781 100644 (file)
@@ -605,9 +605,11 @@ zio_handle_io_delay(zio_t *zio)
                if (vd->vdev_guid != handler->zi_record.zi_guid)
                        continue;
 
+               /* also match on I/O type (e.g., -T read) */
                if (handler->zi_record.zi_iotype != ZIO_TYPES &&
-                   handler->zi_record.zi_iotype != zio->io_type)
-                               continue;
+                   handler->zi_record.zi_iotype != zio->io_type) {
+                       continue;
+               }
 
                /*
                 * Defensive; should never happen as the array allocation
index a0b74ef4a8c6daa8f42dfe5781c8b77e82356e05..92ce09ec6fcbc1200790b81d5f20649e5106bfc6 100644 (file)
@@ -146,7 +146,7 @@ tags = ['functional', 'mmap']
 tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
     'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import',
     'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history',
-    'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid']
+    'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid', 'mmp_write_slow_disk']
 tags = ['functional', 'mmp']
 
 [tests/functional/mount:Linux]
index 8befed077234543403e4fd7f1a578e92396e3120..cc66d762f3c212ec2610c75ea6fb4d0f883c5a3d 100644 (file)
@@ -1588,6 +1588,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
        functional/mmp/mmp_on_zdb.ksh \
        functional/mmp/mmp_reset_interval.ksh \
        functional/mmp/mmp_write_distribution.ksh \
+       functional/mmp/mmp_write_slow_disk.ksh \
        functional/mmp/mmp_write_uberblocks.ksh \
        functional/mmp/multihost_history.ksh \
        functional/mmp/setup.ksh \
diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_write_slow_disk.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_write_slow_disk.ksh
new file mode 100755 (executable)
index 0000000..8b11868
--- /dev/null
@@ -0,0 +1,97 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024, Klara Inc
+#
+
+# DESCRIPTION:
+#      Verify that long VDEV probes do not cause MMP checks to suspend pool
+#      Note: without PR-15839 fix, this test will suspend the pool.
+#
+#      A device that is returning unexpected errors will trigger a vdev_probe.
+#      When the device additionally has slow response times, the probe can hold
+#      the spa config lock as a writer for a long period of time such that the
+#      mmp uberblock updates stall when trying to acquire the spa config lock.
+#
+# STRATEGY:
+#      1. Create a pool with multiple leaf vdevs
+#      2. Enable multihost and multihost_history
+#      3. Delay for MMP writes to occur
+#      4. Verify that a long VDEV probe didn't cause MMP check to suspend pool
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/mmp/mmp.cfg
+. $STF_SUITE/tests/functional/mmp/mmp.kshlib
+
+verify_runnable "both"
+
+function cleanup
+{
+       log_must zinject -c all
+
+       if [[ $(zpool list -H -o health $MMP_POOL) == "SUSPENDED" ]]; then
+               log_must zpool clear $MMP_POOL
+               zpool get state $MMP_POOL $MMP_DIR/file.3
+               zpool events | grep ".fs.zfs." | grep -v "history_event"
+       fi
+
+       poolexists $MMP_POOL && destroy_pool $MMP_POOL
+       log_must rm -r $MMP_DIR
+       log_must mmp_clear_hostid
+}
+
+log_assert "A long VDEV probe doesn't cause a MMP check suspend"
+log_onexit cleanup
+
+MMP_HISTORY_URL=/proc/spl/kstat/zfs/$MMP_POOL/multihost
+
+# Create a multiple drive pool
+log_must zpool events -c
+log_must mkdir -p $MMP_DIR
+log_must truncate -s 128M $MMP_DIR/file.{0,1,2,3,4,5}
+log_must zpool create -f $MMP_POOL \
+       mirror $MMP_DIR/file.{0,1,2} \
+       mirror $MMP_DIR/file.{3,4,5}
+
+# Enable MMP
+log_must mmp_set_hostid $HOSTID1
+log_must zpool set multihost=on $MMP_POOL
+clear_mmp_history
+
+# Inject vdev write error along with a delay
+log_must zinject -f 33 -e io -L pad2 -T write -d $MMP_DIR/file.3 $MMP_POOL
+log_must zinject -f 50 -e io -L uber -T write -d $MMP_DIR/file.3 $MMP_POOL
+log_must zinject -D 2000:4 -T write -d $MMP_DIR/file.3 $MMP_POOL
+
+log_must dd if=/dev/urandom of=/$MMP_POOL/data bs=1M count=5
+sleep 10
+sync_pool $MMP_POOL
+
+# Confirm mmp writes to the non-slow disks have taken place
+for x in {0,1,2,4}; do
+       write_count=$(grep -c file.${x} $MMP_HISTORY_URL)
+       [[ $write_count -gt 0 ]] || log_fail "expecting mmp writes"
+done
+
+# Expect that the pool was not suspended
+log_must check_state $MMP_POOL "" "ONLINE"
+health=$(zpool list -H -o health $MMP_POOL)
+log_note "$MMP_POOL health is $health"
+[[ "$health" == "SUSPENDED" ]] && log_fail "$MMP_POOL $health unexpected"
+
+log_pass "A long VDEV probe doesn't cause a MMP check suspend"