* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
- * Copyright (c) 2024, Klara Inc.
+ * Copyright (c) 2023-2024, Klara Inc.
*/
/*
"\t\tcreate 3 lanes on the device; one lane with a latency\n"
"\t\tof 10 ms and two lanes with a 25 ms latency.\n"
"\n"
+ "\tzinject -P import|export -s <seconds> pool\n"
+ "\t\tAdd an artificial delay to a future pool import or export,\n"
+ "\t\tsuch that the operation takes a minimum of supplied seconds\n"
+ "\t\tto complete.\n"
+ "\n"
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
"\t\tCause the pool to stop writing blocks yet not\n"
"\t\treport errors for a duration. Simulates buggy hardware\n"
{
int *count = data;
- if (record->zi_guid != 0 || record->zi_func[0] != '\0')
+ if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
+ record->zi_duration != 0) {
return (0);
+ }
if (*count == 0) {
(void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
return (0);
}
+static int
+print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
+ record->zi_cmd != ZINJECT_DELAY_EXPORT) {
+ return (0);
+ }
+
+ if (*count == 0) {
+ (void) printf("%3s %-19s %-11s %s\n",
+ "ID", "POOL", "DELAY (sec)", "COMMAND");
+ (void) printf("--- ------------------- -----------"
+ " -------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-19s %-11llu %s\n",
+ id, pool, (u_longlong_t)record->zi_duration,
+ record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
+
+ return (0);
+}
+
/*
* Print all registered error handlers. Returns the number of handlers
* registered.
count = 0;
}
+ (void) iter_handlers(print_pool_delay_handler, &count);
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
(void) iter_handlers(print_panic_handler, &count);
return (count + total);
zc.zc_guid = flags;
if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
- (void) fprintf(stderr, "failed to add handler: %s\n",
- errno == EDOM ? "block level exceeds max level of object" :
- strerror(errno));
+ const char *errmsg = strerror(errno);
+
+ switch (errno) {
+ case EDOM:
+ errmsg = "block level exceeds max level of object";
+ break;
+ case EEXIST:
+ if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
+ errmsg = "pool already imported";
+ if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
+ errmsg = "a handler already exists";
+ break;
+ case ENOENT:
+ /* import delay injector running on older zfs module */
+ if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
+ errmsg = "import delay injector not supported";
+ break;
+ default:
+ break;
+ }
+ (void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
return (1);
}
} else if (record->zi_duration < 0) {
(void) printf(" txgs: %lld \n",
(u_longlong_t)-record->zi_duration);
+ } else if (record->zi_timer > 0) {
+ (void) printf(" timer: %lld ms\n",
+ (u_longlong_t)NSEC2MSEC(record->zi_timer));
} else {
(void) printf("objset: %llu\n",
(u_longlong_t)record->zi_objset);
}
while ((c = getopt(argc, argv,
- ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
+ ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
switch (c) {
case 'a':
flags |= ZINJECT_FLUSH_ARC;
sizeof (record.zi_func));
record.zi_cmd = ZINJECT_PANIC;
break;
+ case 'P':
+ if (strcasecmp(optarg, "import") == 0) {
+ record.zi_cmd = ZINJECT_DELAY_IMPORT;
+ } else if (strcasecmp(optarg, "export") == 0) {
+ record.zi_cmd = ZINJECT_DELAY_EXPORT;
+ } else {
+ (void) fprintf(stderr, "invalid command '%s': "
+ "must be 'import' or 'export'\n", optarg);
+ usage();
+ libzfs_fini(g_zfs);
+ return (1);
+ }
+ break;
case 'q':
quiet = 1;
break;
argc -= optind;
argv += optind;
- if (record.zi_duration != 0)
+ if (record.zi_duration != 0 && record.zi_cmd == 0)
record.zi_cmd = ZINJECT_IGNORED_WRITES;
if (cancel != NULL) {
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || device != NULL || record.zi_freq > 0 ||
dvas != 0) {
- (void) fprintf(stderr, "panic (-p) incompatible with "
- "other options\n");
+ (void) fprintf(stderr, "%s incompatible with other "
+ "options\n", "import|export delay (-P)");
usage();
libzfs_fini(g_zfs);
return (2);
if (argv[1] != NULL)
record.zi_type = atoi(argv[1]);
dataset[0] = '\0';
+ } else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
+ record.zi_cmd == ZINJECT_DELAY_EXPORT) {
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0 || device != NULL || record.zi_freq > 0 ||
+ dvas != 0) {
+ (void) fprintf(stderr, "%s incompatible with other "
+ "options\n", "import|export delay (-P)");
+ usage();
+ libzfs_fini(g_zfs);
+ return (2);
+ }
+
+ if (argc != 1 || record.zi_duration <= 0) {
+ (void) fprintf(stderr, "import|export delay (-P) "
+ "injection requires a duration (-s) and a single "
+ "pool name\n");
+ usage();
+ libzfs_fini(g_zfs);
+ return (2);
+ }
+
+ (void) strlcpy(pool, argv[0], sizeof (pool));
} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || record.zi_freq > 0 || dvas != 0) {
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <thread_pool.h>
#include <time.h>
#include <unistd.h>
#include <pwd.h>
return (ret);
}
+typedef struct import_parameters {
+ nvlist_t *ip_config;
+ const char *ip_mntopts;
+ nvlist_t *ip_props;
+ int ip_flags;
+ int *ip_err;
+} import_parameters_t;
+
+static void
+do_import_task(void *arg)
+{
+ import_parameters_t *ip = arg;
+ *ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
+ ip->ip_props, ip->ip_flags);
+ free(ip);
+}
+
+
static int
import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
- char *orig_name, char *new_name,
- boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all,
- importargs_t *import)
+ char *orig_name, char *new_name, importargs_t *import)
{
nvlist_t *config = NULL;
nvlist_t *found_config = NULL;
uint64_t pool_state;
+ boolean_t pool_specified = (import->poolname != NULL ||
+ import->guid != 0);
+
+
+ tpool_t *tp = NULL;
+ if (import->do_all) {
+ tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN),
+ 0, NULL);
+ }
/*
* At this point we have a list of import candidate configs. Even if
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
&pool_state) == 0);
- if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
+ if (!import->do_destroyed &&
+ pool_state == POOL_STATE_DESTROYED)
continue;
- if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
+ if (import->do_destroyed &&
+ pool_state != POOL_STATE_DESTROYED)
continue;
verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
if (!pool_specified) {
if (first)
first = B_FALSE;
- else if (!do_all)
+ else if (!import->do_all)
(void) fputc('\n', stdout);
- if (do_all) {
- err |= do_import(config, NULL, mntopts,
- props, flags);
+ if (import->do_all) {
+ import_parameters_t *ip = safe_malloc(
+ sizeof (import_parameters_t));
+
+ ip->ip_config = config;
+ ip->ip_mntopts = mntopts;
+ ip->ip_props = props;
+ ip->ip_flags = flags;
+ ip->ip_err = &err;
+
+ (void) tpool_dispatch(tp, do_import_task,
+ (void *)ip);
} else {
/*
* If we're importing from cachefile, then
found_config = config;
}
}
+ if (import->do_all) {
+ tpool_wait(tp);
+ tpool_destroy(tp);
+ }
/*
* If we were searching for a specific pool, verify that we found a
boolean_t xtreme_rewind = B_FALSE;
boolean_t do_scan = B_FALSE;
boolean_t pool_exists = B_FALSE;
- boolean_t pool_specified = B_FALSE;
uint64_t txg = -1ULL;
char *cachefile = NULL;
importargs_t idata = { 0 };
searchname = argv[0];
searchguid = 0;
}
- pool_specified = B_TRUE;
/*
* User specified a name or guid. Ensure it's unique.
idata.cachefile = cachefile;
idata.scan = do_scan;
idata.policy = policy;
+ idata.do_destroyed = do_destroyed;
+ idata.do_all = do_all;
libpc_handle_t lpch = {
.lpc_lib_handle = g_zfs,
}
err = import_pools(pools, props, mntopts, flags,
- argc >= 1 ? argv[0] : NULL,
- argc >= 2 ? argv[1] : NULL,
- do_destroyed, pool_specified, do_all, &idata);
+ argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, &idata);
/*
* If we're using the cachefile and we failed to import, then
pools = zpool_search_import(&lpch, &idata);
err = import_pools(pools, props, mntopts, flags,
- argc >= 1 ? argv[0] : NULL,
- argc >= 2 ? argv[1] : NULL,
- do_destroyed, pool_specified, do_all, &idata);
+ argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL,
+ &idata);
}
error:
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018, 2024 by Delphix. All rights reserved.
*/
#ifndef _LIBZUTIL_H
boolean_t can_be_active; /* can the pool be active? */
boolean_t scan; /* prefer scanning to libblkid cache */
nvlist_t *policy; /* load policy (max txg, rewind, etc.) */
+ boolean_t do_destroyed;
+ boolean_t do_all;
} importargs_t;
typedef struct libpc_handle {
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
+extern avl_tree_t spa_namespace_avl;
+extern kcondvar_t spa_namespace_cv;
/*
* SPA configuration functions in spa_config.c
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
dsl_pool_t *spa_dsl_pool;
boolean_t spa_is_initializing; /* true while opening pool */
boolean_t spa_is_exporting; /* true while exporting pool */
+ kthread_t *spa_load_thread; /* loading, no namespace lock */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2017, Intel Corporation.
*/
ZINJECT_PANIC,
ZINJECT_DELAY_IO,
ZINJECT_DECRYPT_FAULT,
+ ZINJECT_DELAY_IMPORT,
+ ZINJECT_DELAY_EXPORT,
} zinject_type_t;
typedef struct zfs_share {
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com>
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
extern hrtime_t zio_handle_io_delay(zio_t *zio);
+extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed);
+extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed);
/*
* Checksum ereport functions
.
.It Xo
.Nm zinject
+.Fl i Ar seconds
+.Ar pool
+.Xc
+Add an artificial delay during the future import of a pool.
+This injector is automatically cleared after the import is finished.
+.
+.It Xo
+.Nm zinject
.Fl I
.Op Fl s Ar seconds Ns | Ns Fl g Ar txgs
.Ar pool
{
ASSERT(spa_writeable(spa));
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
spa_start_raidz_expansion_thread(spa);
spa_start_indirect_condensing_thread(spa);
spa_start_livelist_destroy_thread(spa);
int error = 0;
ASSERT0(spa->spa_checkpoint_txg);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ spa->spa_load_thread == curthread);
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
boolean_t checkpoint_rewind =
(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
boolean_t update_config_cache = B_FALSE;
+ hrtime_t load_start = gethrtime();
ASSERT(MUTEX_HELD(&spa_namespace_lock));
ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
return (error);
}
+ /*
+ * Drop the namespace lock for the rest of the function.
+ */
+ spa->spa_load_thread = curthread;
+ mutex_exit(&spa_namespace_lock);
+
/*
* Retrieve the checkpoint txg if the pool has a checkpoint.
*/
spa_import_progress_set_notes(spa, "Loading checkpoint txg");
error = spa_ld_read_checkpoint_txg(spa);
if (error != 0)
- return (error);
+ goto fail;
/*
* Retrieve the mapping of indirect vdevs. Those vdevs were removed
spa_import_progress_set_notes(spa, "Loading indirect vdev metadata");
error = spa_ld_open_indirect_vdev_metadata(spa);
if (error != 0)
- return (error);
+ goto fail;
/*
* Retrieve the full list of active features from the MOS and check if
spa_import_progress_set_notes(spa, "Checking feature flags");
error = spa_ld_check_features(spa, &missing_feat_write);
if (error != 0)
- return (error);
+ goto fail;
/*
* Load several special directories from the MOS needed by the dsl_pool
spa_import_progress_set_notes(spa, "Loading special MOS directories");
error = spa_ld_load_special_directories(spa);
if (error != 0)
- return (error);
+ goto fail;
/*
* Retrieve pool properties from the MOS.
spa_import_progress_set_notes(spa, "Loading properties");
error = spa_ld_get_props(spa);
if (error != 0)
- return (error);
+ goto fail;
/*
* Retrieve the list of auxiliary devices - cache devices and spares -
spa_import_progress_set_notes(spa, "Loading AUX vdevs");
error = spa_ld_open_aux_vdevs(spa, type);
if (error != 0)
- return (error);
+ goto fail;
/*
* Load the metadata for all vdevs. Also check if unopenable devices
spa_import_progress_set_notes(spa, "Loading vdev metadata");
error = spa_ld_load_vdev_metadata(spa);
if (error != 0)
- return (error);
+ goto fail;
spa_import_progress_set_notes(spa, "Loading dedup tables");
error = spa_ld_load_dedup_tables(spa);
if (error != 0)
- return (error);
+ goto fail;
spa_import_progress_set_notes(spa, "Loading BRT");
error = spa_ld_load_brt(spa);
if (error != 0)
- return (error);
+ goto fail;
/*
* Verify the logs now to make sure we don't have any unexpected errors
spa_import_progress_set_notes(spa, "Verifying Log Devices");
error = spa_ld_verify_logs(spa, type, ereport);
if (error != 0)
- return (error);
+ goto fail;
if (missing_feat_write) {
ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT);
* read-only mode but not read-write mode. We now have enough
* information and can return to userland.
*/
- return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
- ENOTSUP));
+ error = spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
+ ENOTSUP);
+ goto fail;
}
/*
spa_import_progress_set_notes(spa, "Verifying pool data");
error = spa_ld_verify_pool_data(spa);
if (error != 0)
- return (error);
+ goto fail;
/*
* Calculate the deflated space for the pool. This must be done before
spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_import_progress_set_notes(spa, "Finished importing");
}
+ zio_handle_import_delay(spa, gethrtime() - load_start);
spa_import_progress_remove(spa_guid(spa));
spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
spa_load_note(spa, "LOADED");
+fail:
+ mutex_enter(&spa_namespace_lock);
+ spa->spa_load_thread = NULL;
+ cv_broadcast(&spa_namespace_cv);
+
+ return (error);
- return (0);
}
static int
/*
* Create and initialize the spa structure.
*/
+ char *name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
+ TRYIMPORT_NAME, (u_longlong_t)curthread, poolname);
+
mutex_enter(&spa_namespace_lock);
- spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
+ spa = spa_add(name, tryconfig, NULL);
spa_activate(spa, SPA_MODE_READ);
+ kmem_free(name, MAXPATHLEN);
/*
* Rewind pool if a max txg was provided.
{
int error;
spa_t *spa;
+ hrtime_t export_start = gethrtime();
if (oldconfig)
*oldconfig = NULL;
spa->spa_is_exporting = B_FALSE;
}
+ if (new_state == POOL_STATE_EXPORTED)
+ zio_handle_export_delay(spa, gethrtime() - export_start);
+
mutex_exit(&spa_namespace_lock);
return (0);
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* - Check if spa_refcount is zero
* - Rename a spa_t
* - add/remove/attach/detach devices
- * - Held for the duration of create/destroy/import/export
+ * - Held for the duration of create/destroy/export
+ * - Held at the start and end of import
*
* It does not need to handle recursion. A create or destroy may
* reference objects (files or zvols) in other pools, but by
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
*/
-static avl_tree_t spa_namespace_avl;
+avl_tree_t spa_namespace_avl;
kmutex_t spa_namespace_lock;
-static kcondvar_t spa_namespace_cv;
+kcondvar_t spa_namespace_cv;
static const int spa_max_replication_override = SPA_DVAS_PER_BP;
static kmutex_t spa_spare_lock;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
+retry:
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
/*
*cp = '\0';
spa = avl_find(&spa_namespace_avl, &search, &where);
+ if (spa == NULL)
+ return (NULL);
+
+ if (spa->spa_load_thread != NULL &&
+ spa->spa_load_thread != curthread) {
+ cv_wait(&spa_namespace_cv, &spa_namespace_lock);
+ goto retry;
+ }
return (spa);
}
spa_config_lock_init(spa);
spa_stats_init(spa);
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
avl_add(&spa_namespace_avl, spa);
/*
nvlist_free(spa->spa_config_splitting);
avl_remove(&spa_namespace_avl, spa);
- cv_broadcast(&spa_namespace_cv);
if (spa->spa_root)
spa_strfree(spa->spa_root);
spa_open_ref(spa_t *spa, const void *tag)
{
ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
- MUTEX_HELD(&spa_namespace_lock));
+ MUTEX_HELD(&spa_namespace_lock) ||
+ spa->spa_load_thread == curthread);
(void) zfs_refcount_add(&spa->spa_refcount, tag);
}
spa_close(spa_t *spa, const void *tag)
{
ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
- MUTEX_HELD(&spa_namespace_lock));
+ MUTEX_HELD(&spa_namespace_lock) ||
+ spa->spa_load_thread == curthread);
(void) zfs_refcount_remove(&spa->spa_refcount, tag);
}
*/
/*
- * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2024 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
void
vdev_initialize_restart(vdev_t *vd)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ vd->vdev_spa->spa_load_thread == curthread);
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
if (vd->vdev_leaf_zap != 0) {
* Copyright (c) 2018, Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
+ * Copyright (c) 2024 by Delphix. All rights reserved.
*/
#include <sys/vdev_impl.h>
void
vdev_rebuild_restart(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ spa->spa_load_thread == curthread);
vdev_rebuild_restart_impl(spa->spa_root_vdev);
}
*/
/*
- * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2024 by Delphix. All rights reserved.
* Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2021 Hewlett Packard Enterprise Development LP
* Copyright 2023 RackTop Systems, Inc.
void
vdev_trim_restart(vdev_t *vd)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ vd->vdev_spa->spa_load_thread == curthread);
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
if (vd->vdev_leaf_zap != 0) {
void
vdev_autotrim_restart(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
+ ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ spa->spa_load_thread == curthread);
if (spa->spa_autotrim)
vdev_autotrim(spa);
}
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2024, Klara Inc.
*/
/*
typedef struct inject_handler {
int zi_id;
spa_t *zi_spa;
+ char *zi_spa_name; /* ZINJECT_DELAY_IMPORT only */
zinject_record_t zi_record;
uint64_t *zi_lanes;
int zi_next_lane;
return (min_target);
}
+static void
+zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command)
+{
+ inject_handler_t *handler;
+ hrtime_t delay = 0;
+ int id = 0;
+
+ rw_enter(&inject_lock, RW_READER);
+
+ for (handler = list_head(&inject_handlers);
+ handler != NULL && handler->zi_record.zi_cmd == command;
+ handler = list_next(&inject_handlers, handler)) {
+ ASSERT3P(handler->zi_spa_name, !=, NULL);
+ if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) {
+ uint64_t pause =
+ SEC2NSEC(handler->zi_record.zi_duration);
+ if (pause > elapsed) {
+ delay = pause - elapsed;
+ }
+ id = handler->zi_id;
+ break;
+ }
+ }
+
+ rw_exit(&inject_lock);
+
+ if (delay) {
+ if (command == ZINJECT_DELAY_IMPORT) {
+ spa_import_progress_set_notes(spa, "injecting %llu "
+ "sec delay", (u_longlong_t)NSEC2SEC(delay));
+ }
+ zfs_sleep_until(gethrtime() + delay);
+ }
+ if (id) {
+ /* all done with this one-shot handler */
+ zio_clear_fault(id);
+ }
+}
+
+/*
+ * For testing, inject a delay during an import
+ */
+void
+zio_handle_import_delay(spa_t *spa, hrtime_t elapsed)
+{
+ zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_IMPORT);
+}
+
+/*
+ * For testing, inject a delay during an export
+ */
+void
+zio_handle_export_delay(spa_t *spa, hrtime_t elapsed)
+{
+ zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_EXPORT);
+}
+
static int
zio_calculate_range(const char *pool, zinject_record_t *record)
{
return (0);
}
+static boolean_t
+zio_pool_handler_exists(const char *name, zinject_type_t command)
+{
+ boolean_t exists = B_FALSE;
+
+ rw_enter(&inject_lock, RW_READER);
+ for (inject_handler_t *handler = list_head(&inject_handlers);
+ handler != NULL; handler = list_next(&inject_handlers, handler)) {
+ if (command != handler->zi_record.zi_cmd)
+ continue;
+
+ const char *pool = (handler->zi_spa_name != NULL) ?
+ handler->zi_spa_name : spa_name(handler->zi_spa);
+ if (strcmp(name, pool) == 0) {
+ exists = B_TRUE;
+ break;
+ }
+ }
+ rw_exit(&inject_lock);
+
+ return (exists);
+}
/*
* Create a new handler for the given record. We add it to the list, adding
* a reference to the spa_t in the process. We increment zio_injection_enabled,
if (!(flags & ZINJECT_NULL)) {
/*
- * spa_inject_ref() will add an injection reference, which will
- * prevent the pool from being removed from the namespace while
- * still allowing it to be unloaded.
+ * Pool delays for import or export don't take an
+ * injection reference on the spa. Instead they
+ * rely on matching by name.
*/
- if ((spa = spa_inject_addref(name)) == NULL)
- return (SET_ERROR(ENOENT));
+ if (record->zi_cmd == ZINJECT_DELAY_IMPORT ||
+ record->zi_cmd == ZINJECT_DELAY_EXPORT) {
+ if (record->zi_duration <= 0)
+ return (SET_ERROR(EINVAL));
+ /*
+ * Only one import | export delay handler per pool.
+ */
+ if (zio_pool_handler_exists(name, record->zi_cmd))
+ return (SET_ERROR(EEXIST));
+
+ mutex_enter(&spa_namespace_lock);
+ boolean_t has_spa = spa_lookup(name) != NULL;
+ mutex_exit(&spa_namespace_lock);
+
+ if (record->zi_cmd == ZINJECT_DELAY_IMPORT && has_spa)
+ return (SET_ERROR(EEXIST));
+ if (record->zi_cmd == ZINJECT_DELAY_EXPORT && !has_spa)
+ return (SET_ERROR(ENOENT));
+ spa = NULL;
+ } else {
+ /*
+ * spa_inject_ref() will add an injection reference,
+ * which will prevent the pool from being removed
+ * from the namespace while still allowing it to be
+ * unloaded.
+ */
+ if ((spa = spa_inject_addref(name)) == NULL)
+ return (SET_ERROR(ENOENT));
+ }
handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
-
- handler->zi_spa = spa;
+ handler->zi_spa = spa; /* note: can be NULL */
handler->zi_record = *record;
if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
handler->zi_next_lane = 0;
}
+ if (handler->zi_spa == NULL)
+ handler->zi_spa_name = spa_strdup(name);
+ else
+ handler->zi_spa_name = NULL;
+
rw_enter(&inject_lock, RW_WRITER);
/*
if (handler) {
*record = handler->zi_record;
*id = handler->zi_id;
- (void) strlcpy(name, spa_name(handler->zi_spa), buflen);
+ ASSERT(handler->zi_spa || handler->zi_spa_name);
+ if (handler->zi_spa != NULL)
+ (void) strlcpy(name, spa_name(handler->zi_spa), buflen);
+ else
+ (void) strlcpy(name, handler->zi_spa_name, buflen);
ret = 0;
} else {
ret = SET_ERROR(ENOENT);
ASSERT3P(handler->zi_lanes, ==, NULL);
}
- spa_inject_delref(handler->zi_spa);
+ if (handler->zi_spa_name != NULL)
+ spa_strfree(handler->zi_spa_name);
+
+ if (handler->zi_spa != NULL)
+ spa_inject_delref(handler->zi_spa);
kmem_free(handler, sizeof (inject_handler_t));
atomic_dec_32(&zio_injection_enabled);
'import_paths_changed',
'import_rewind_config_changed',
'import_rewind_device_replaced',
- 'zpool_import_status']
+ 'zpool_import_status', 'zpool_import_parallel_pos',
+ 'zpool_import_parallel_neg', 'zpool_import_parallel_admin']
tags = ['functional', 'cli_root', 'zpool_import']
timeout = 1200
functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \
functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh \
functional/cli_root/zpool_import/zpool_import_status.ksh \
+ functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh \
+ functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh \
+ functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh \
functional/cli_root/zpool_initialize/cleanup.ksh \
functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \
functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \
--- /dev/null
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2023 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# Verify that admin commands to different pool are not blocked by import
+#
+# STRATEGY:
+# 1. Create 2 pools
+# 2. Export one of the pools
+# 4. Import the pool with an injected delay
+# 5. Execute some admin commands against both pools
+# 6. Verify that the admin commands to the non-imported pool don't stall
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zinject -c all
+ destroy_pool $TESTPOOL1
+ destroy_pool $TESTPOOL2
+}
+
+function pool_import
+{
+ typeset dir=$1
+ typeset pool=$2
+
+ SECONDS=0
+ errmsg=$(zpool import -d $dir -f $pool 2>&1 > /dev/null)
+ if [[ $? -eq 0 ]]; then
+ echo ${pool}: imported in $SECONDS secs
+ echo $SECONDS > ${DEVICE_DIR}/${pool}-import
+ else
+ echo ${pool}: import failed $errmsg in $SECONDS secs
+ fi
+}
+
+function pool_add_device
+{
+ typeset pool=$1
+ typeset device=$2
+ typeset devtype=$3
+
+ SECONDS=0
+ errmsg=$(zpool add $pool $devtype $device 2>&1 > /dev/null)
+ if [[ $? -eq 0 ]]; then
+ echo ${pool}: added $devtype vdev in $SECONDS secs
+ echo $SECONDS > ${DEVICE_DIR}/${pool}-add
+ else
+ echo ${pool}: add $devtype vdev failed ${errmsg}, in $SECONDS secs
+ fi
+}
+
+function pool_stats
+{
+ typeset stats=$1
+ typeset pool=$2
+
+ SECONDS=0
+ errmsg=$(zpool $stats $pool 2>&1 > /dev/null)
+ if [[ $? -eq 0 ]]; then
+ echo ${pool}: $stats in $SECONDS secs
+ echo $SECONDS > ${DEVICE_DIR}/${pool}-${stats}
+ else
+ echo ${pool}: $stats failed ${errmsg}, in $SECONDS secs
+ fi
+}
+
+function pool_create
+{
+ typeset pool=$1
+ typeset device=$2
+
+ SECONDS=0
+ errmsg=$(zpool create $pool $device 2>&1 > /dev/null)
+ if [[ $? -eq 0 ]]; then
+ echo ${pool}: created in $SECONDS secs
+ echo $SECONDS > ${DEVICE_DIR}/${pool}-create
+ else
+ echo ${pool}: create failed ${errmsg}, in $SECONDS secs
+ fi
+}
+
+log_assert "Simple admin commands to different pool not blocked by import"
+
+log_onexit cleanup
+
+#
+# create two pools and export one
+#
+log_must zpool create $TESTPOOL1 $VDEV0
+log_must zpool export $TESTPOOL1
+log_must zpool create $TESTPOOL2 $VDEV1
+
+#
+# import pool asyncronously with an injected 10 second delay
+#
+log_must zinject -P import -s 10 $TESTPOOL1
+pool_import $DEVICE_DIR $TESTPOOL1 &
+
+sleep 2
+
+#
+# run some admin commands on the pools while the import is in progress
+#
+
+pool_add_device $TESTPOOL1 $VDEV2 "log" &
+pool_add_device $TESTPOOL2 $VDEV3 "cache" &
+pool_stats "status" $TESTPOOL1 &
+pool_stats "status" $TESTPOOL2 &
+pool_stats "list" $TESTPOOL1 &
+pool_stats "list" $TESTPOOL2 &
+pool_create $TESTPOOL1 $VDEV4 &
+wait
+
+log_must zpool sync $TESTPOOL1 $TESTPOOL2
+
+zpool history $TESTPOOL1
+zpool history $TESTPOOL2
+
+log_must test "5" -lt $(<${DEVICE_DIR}/${TESTPOOL1}-import)
+
+#
+# verify that commands to second pool did not wait for import to finish
+#
+log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-status)
+log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-list)
+log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-add)
+[[ -e ${DEVICE_DIR}/${TESTPOOL1}-create ]] && log_fail "unexpected pool create"
+
+log_pass "Simple admin commands to different pool not blocked by import"
--- /dev/null
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2023 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# Verify that pool imports by same name only have one winner
+#
+# STRATEGY:
+# 1. Create 4 single disk pools with the same name
+# 2. Generate some ZIL records (for a longer import)
+# 3. Export the pools
+# 4. Import the pools in parallel
+# 5. Repeat with using matching guids
+#
+
+verify_runnable "global"
+
+POOLNAME="import_pool"
+DEV_DIR_PREFIX="$DEVICE_DIR/$POOLNAME"
+VDEVSIZE=$((512 * 1024 * 1024))
+
+log_assert "parallel pool imports by same name only have one winner"
+
+# each pool has its own device directory
+for i in {0..3}; do
+ log_must mkdir -p ${DEV_DIR_PREFIX}$i
+ log_must truncate -s $VDEVSIZE ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
+done
+
+function cleanup
+{
+ zinject -c all
+ log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
+ log_must set_tunable64 METASLAB_DEBUG_LOAD 0
+
+ destroy_pool $POOLNAME
+
+ log_must rm -rf $DEV_DIR_PREFIX*
+}
+
+log_onexit cleanup
+
+log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
+log_must set_tunable64 METASLAB_DEBUG_LOAD 1
+
+function import_pool
+{
+ typeset dir=$1
+ typeset pool=$2
+ typeset newname=$3
+
+ SECONDS=0
+ errmsg=$(zpool import -N -d $dir -f $pool $newname 2>&1 > /dev/null)
+ if [[ $? -eq 0 ]]; then
+ touch $dir/imported
+ echo "imported $pool in $SECONDS secs"
+ elif [[ $errmsg == *"cannot import"* ]]; then
+ echo "pool import failed: $errmsg, waited $SECONDS secs"
+ touch $dir/failed
+ fi
+}
+
+#
+# create four exported pools with the same name
+#
+for i in {0..3}; do
+ log_must zpool create $POOLNAME ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
+ log_must zpool export $POOLNAME
+done
+log_must zinject -P import -s 10 $POOLNAME
+
+#
+# import the pools in parallel, expecting only one winner
+#
+for i in {0..3}; do
+ import_pool ${DEV_DIR_PREFIX}$i $POOLNAME &
+done
+wait
+
+# check the result of background imports
+typeset num_imports=0
+typeset num_cannot=0
+for i in {0..3}; do
+ if [[ -f ${DEV_DIR_PREFIX}$i/imported ]]; then
+ ((num_imports += 1))
+ fi
+ if [[ -f ${DEV_DIR_PREFIX}$i/failed ]]; then
+ ((num_cannot += 1))
+ loser=$i
+ fi
+done
+[[ $num_imports -eq "1" ]] || log_fail "expecting an import"
+[[ $num_cannot -eq "3" ]] || \
+ log_fail "expecting 3 pool exists errors, found $num_cannot"
+
+log_note "$num_imports imported and $num_cannot failed (expected)"
+
+log_pass "parallel pool imports by same name only have one winner"
--- /dev/null
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2023 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+# test uses 8 vdevs
+export MAX_NUM=8
+
+#
+# DESCRIPTION:
+# Verify that pool imports can occur in parallel
+#
+# STRATEGY:
+# 1. Create 8 pools
+# 2. Generate some ZIL records
+# 3. Export the pools
+# 4. Import half of the pools synchronously to baseline sequential cost
+# 5. Import the other half asynchronously to demonstrate parallel savings
+# 6. Export 4 pools
+# 7. Test zpool import -a
+#
+
+verify_runnable "global"
+
+#
+# override the minimum sized vdevs
+#
+VDEVSIZE=$((512 * 1024 * 1024))
+increase_device_sizes $VDEVSIZE
+
+POOLNAME="import_pool"
+
+function cleanup
+{
+ zinject -c all
+ log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
+ log_must set_tunable64 METASLAB_DEBUG_LOAD 0
+
+ for i in {0..$(($MAX_NUM - 1))}; do
+ destroy_pool $POOLNAME-$i
+ done
+ # reset the devices
+ increase_device_sizes 0
+ increase_device_sizes $FILE_SIZE
+}
+
+log_assert "Pool imports can occur in parallel"
+
+log_onexit cleanup
+
+log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
+log_must set_tunable64 METASLAB_DEBUG_LOAD 1
+
+
+#
+# create some exported pools with import delay injectors
+#
+for i in {0..$(($MAX_NUM - 1))}; do
+ log_must zpool create $POOLNAME-$i $DEVICE_DIR/${DEVICE_FILE}$i
+ log_must zpool export $POOLNAME-$i
+ log_must zinject -P import -s 12 $POOLNAME-$i
+done
+wait
+
+#
+# import half of the pools synchronously
+#
+SECONDS=0
+for i in {0..3}; do
+ log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i
+done
+sequential_time=$SECONDS
+log_note "sequentially imported 4 pools in $sequential_time seconds"
+
+#
+# import half of the pools in parallel
+#
+SECONDS=0
+for i in {4..7}; do
+ log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i &
+done
+wait
+parallel_time=$SECONDS
+log_note "asyncronously imported 4 pools in $parallel_time seconds"
+
+log_must test $parallel_time -lt $(($sequential_time / 3))
+
+#
+# export pools with import delay injectors
+#
+for i in {4..7}; do
+ log_must zpool export $POOLNAME-$i
+ log_must zinject -P import -s 12 $POOLNAME-$i
+done
+wait
+
+#
+# now test zpool import -a
+#
+SECONDS=0
+log_must zpool import -a -d $DEVICE_DIR -f
+parallel_time=$SECONDS
+log_note "asyncronously imported 4 pools in $parallel_time seconds"
+
+log_must test $parallel_time -lt $(($sequential_time / 3))
+
+log_pass "Pool imports occur in parallel"