@@ -24,6 +24,28 @@ Available fault injection capabilities
injects futex deadlock and uaddr fault errors.
+- fail_kernfs_fop_write_iter
+
+ Allows for failures to be enabled inside kernfs_fop_write_iter(). Enabling
+ this does not immediately enable any errors to occur. You must configure
+ how you want this routine to fail or change behaviour by using the debugfs
+ knobs for it:
+
+ # ls -1 /sys/kernel/debug/kernfs/config_fail_kernfs_fop_write_iter/
+ wait_after_active
+ wait_after_mutex
+ wait_at_start
+ wait_before_mutex
+
+ You can also configure how long to sleep after a wait under
+
+ /sys/kernel/debug/kernfs/sleep_after_wait_ms
+
+ If you enable CONFIG_FAULT_INJECTION_DEBUG_FS the fail_add_disk failure
+ injection parameters are placed under:
+
+ /sys/kernel/debug/kernfs/fail_kernfs_fop_write_iter/
+
- fail_make_request
injects disk IO errors on devices permitted by setting
@@ -10246,7 +10246,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Tejun Heo <tj@kernel.org>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
-F: fs/kernfs/
+F: fs/kernfs/*
F: include/linux/kernfs.h
KEXEC
@@ -4,3 +4,4 @@
#
obj-y := mount.o inode.o dir.o file.o symlink.o
+obj-$(CONFIG_FAIL_KERNFS_KNOBS) += failure-injection.o
new file mode 100644
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/fault-inject.h>
+#include <linux/delay.h>
+
+#include "kernfs-internal.h"
+
+static DECLARE_FAULT_ATTR(fail_kernfs_fop_write_iter);
+struct kernfs_config_fail kernfs_config_fail;
+
+#define kernfs_config_fail(when) \
+ kernfs_config_fail.kernfs_fop_write_iter_fail.wait_ ## when
+
+#define kernfs_config_fail(when) \
+ kernfs_config_fail.kernfs_fop_write_iter_fail.wait_ ## when
+
+static int __init setup_fail_kernfs_fop_write_iter(char *str)
+{
+ return setup_fault_attr(&fail_kernfs_fop_write_iter, str);
+}
+
+__setup("fail_kernfs_fop_write_iter=", setup_fail_kernfs_fop_write_iter);
+
+struct dentry *kernfs_debugfs_root;
+struct dentry *config_fail_kernfs_fop_write_iter;
+
+static int __init kernfs_init_failure_injection(void)
+{
+ kernfs_config_fail.sleep_after_wait_ms = 100;
+ kernfs_debugfs_root = debugfs_create_dir("kernfs", NULL);
+
+ fault_create_debugfs_attr("fail_kernfs_fop_write_iter",
+ kernfs_debugfs_root, &fail_kernfs_fop_write_iter);
+
+ config_fail_kernfs_fop_write_iter =
+ debugfs_create_dir("config_fail_kernfs_fop_write_iter",
+ kernfs_debugfs_root);
+
+ debugfs_create_u32("sleep_after_wait_ms", 0600,
+ kernfs_debugfs_root,
+ &kernfs_config_fail.sleep_after_wait_ms);
+
+ debugfs_create_bool("wait_at_start", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(at_start));
+ debugfs_create_bool("wait_before_mutex", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(before_mutex));
+ debugfs_create_bool("wait_after_mutex", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(after_mutex));
+ debugfs_create_bool("wait_after_active", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(after_active));
+ return 0;
+}
+late_initcall(kernfs_init_failure_injection);
+
+int __kernfs_debug_should_wait_kernfs_fop_write_iter(bool evaluate)
+{
+ if (!evaluate)
+ return 0;
+
+ return should_fail(&fail_kernfs_fop_write_iter, 0);
+}
+
+DECLARE_COMPLETION(kernfs_debug_wait_completion);
+EXPORT_SYMBOL_NS_GPL(kernfs_debug_wait_completion, KERNFS_DEBUG_PRIVATE);
+
+void kernfs_debug_wait(void)
+{
+ wait_for_completion(&kernfs_debug_wait_completion);
+ pr_info("%s received completion\n", __func__);
+
+ /**
+ * The goal is wait for an event, and *then* once we have
+ * reached it, the other side will try to do something which
+ * it thinks will break. So we must give it some time to do
+ * that. The amount of time is configurable.
+ */
+ msleep(kernfs_config_fail.sleep_after_wait_ms);
+ pr_info("%s ended\n", __func__);
+}
@@ -259,6 +259,9 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
const struct kernfs_ops *ops;
char *buf;
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, at_start))
+ kernfs_debug_wait();
+
if (of->atomic_write_len) {
if (len > of->atomic_write_len)
return -E2BIG;
@@ -280,17 +283,27 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
}
buf[len] = '\0'; /* guarantee string termination */
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, before_mutex))
+ kernfs_debug_wait();
+
/*
* @of->mutex nests outside active ref and is used both to ensure that
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
+
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, after_mutex))
+ kernfs_debug_wait();
+
if (!kernfs_get_active(of->kn)) {
mutex_unlock(&of->mutex);
len = -ENODEV;
goto out_free;
}
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, after_active))
+ kernfs_debug_wait();
+
ops = kernfs_ops(of->kn);
if (ops->write)
len = ops->write(of, buf, len, iocb->ki_pos);
@@ -17,6 +17,7 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
+#include <linux/stringify.h>
struct kernfs_iattrs {
kuid_t ia_uid;
@@ -127,4 +128,75 @@ void kernfs_drain_open_files(struct kernfs_node *kn);
*/
extern const struct inode_operations kernfs_symlink_iops;
+/*
+ * failure-injection.c
+ */
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+
+/**
+ * struct kernfs_fop_write_iter_fail - how kernfs_fop_write_iter_fail fails
+ *
+ * This lets you configure what part of kernfs_fop_write_iter() should behave
+ * in a specific way to allow userspace to capture possible failures in
+ * kernfs. The wait knobs are allowed to let you design capture possible
+ * race conditions which would otherwise be difficult to reproduce. A
+ * secondary driver would tell kernfs's wait completion when it is done.
+ *
+ * The point to the wait completion failure injection tests are to confirm
+ * that the kernfs active refcount suffice to ensure other objects in other
+ * layers are also gauranteed to exist, even they are opaque to kernfs. This
+ * includes kobjects, devices, and other objects built on top of this, like
+ * the block layer when using sysfs block device attributes.
+ *
+ * @wait_at_start: waits for completion from a third party at the start of
+ * the routine.
+ * @wait_before_mutex: waits for completion from a third party before we
+ * are allowed to continue before the of->mutex is held.
+ * @wait_after_mutex: waits for completion from a third party after we
+ * have held the of->mutex.
+ * @wait_after_active: waits for completion from a thid party after we
+ * have refcounted the struct kernfs_node.
+ */
+struct kernfs_fop_write_iter_fail {
+ bool wait_at_start;
+ bool wait_before_mutex;
+ bool wait_after_mutex;
+ bool wait_after_active;
+};
+
+/**
+ * struct kernfs_config_fail - kernfs configuration for failure injection
+ *
+ * You can kernfs failure injection on boot, and in particular we currently
+ * only support failures for kernfs_fop_write_iter(). However, we don't
+ * want to always enable errors on this call when failure injection is enabled
+ * as this routine is used by many parts of the kernel for proper functionality.
+ * The compromise we make is we let userspace start enabling which parts it
+ * wants to fail after boot, if and only if failure injection has been enabled.
+ *
+ * @kernfs_fop_write_iter_fail: configuration for how we want to allow
+ * for failure injection on kernfs_fop_write_iter()
+ * @sleep_after_wait_ms: how many ms to wait after completion is received.
+ */
+struct kernfs_config_fail {
+ struct kernfs_fop_write_iter_fail kernfs_fop_write_iter_fail;
+ u32 sleep_after_wait_ms;
+};
+
+extern struct kernfs_config_fail kernfs_config_fail;
+
+#define __kernfs_config_wait_var(func, when) \
+ (kernfs_config_fail. func ## _fail.wait_ ## when)
+#define __kernfs_debug_should_wait_func_name(func) __kernfs_debug_should_wait_## func
+
+#define kernfs_debug_should_wait(func, when) \
+ __kernfs_debug_should_wait_func_name(func)(__kernfs_config_wait_var(func, when))
+int __kernfs_debug_should_wait_kernfs_fop_write_iter(bool evaluate);
+void kernfs_debug_wait(void);
+#else
+static inline void kernfs_init_failure_injection(void) {}
+#define kernfs_debug_should_wait(when) (false)
+static inline void kernfs_debug_wait(void) {}
+#endif
+
#endif /* __KERNFS_INTERNAL_H */
@@ -410,6 +410,11 @@ void kernfs_init(void);
struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
u64 id);
+
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+extern struct completion kernfs_debug_wait_completion;
+#endif
+
#else /* CONFIG_KERNFS */
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
@@ -1930,6 +1930,16 @@ config FAULT_INJECTION_USERCOPY
Provides fault-injection capability to inject failures
in usercopy functions (copy_from_user(), get_user(), ...).
+config FAIL_KERNFS_KNOBS
+ bool "Fault-injection support in kernfs"
+ depends on FAULT_INJECTION
+ help
+ Provide fault-injection capability for kernfs. This only enables
+ the error injection functionality. To use it you must configure which
+ which path you want to trigger on error on using debugfs under
+ /sys/kernel/debug/kernfs/config_fail_kernfs_fop_write_iter/. By
+ default all of these are disabled.
+
config FAIL_MAKE_REQUEST
bool "Fault-injection capability for disk IO"
depends on FAULT_INJECTION && BLOCK