@@ -28,6 +28,7 @@ struct bsg_device {
unsigned int timeout;
unsigned int reserved_size;
bsg_sg_io_fn *sg_io_fn;
+ bool goner;
};
static inline struct bsg_device *to_bsg_device(struct inode *inode)
@@ -71,9 +72,14 @@ static int bsg_sg_io(struct bsg_device *bd, fmode_t mode, void __user *uarg)
static int bsg_open(struct inode *inode, struct file *file)
{
- if (!blk_get_queue(to_bsg_device(inode)->queue))
- return -ENXIO;
- return 0;
+ struct bsg_device *bd = to_bsg_device(inode);
+ int err = 0;
+
+ rcu_read_lock();
+ if (bd->goner || !blk_get_queue(bd->queue))
+ err = -ENXIO;
+ rcu_read_unlock();
+ return err;
}
static int bsg_release(struct inode *inode, struct file *file)
@@ -175,6 +181,7 @@ static void bsg_device_release(struct device *dev)
void bsg_unregister_queue(struct bsg_device *bd)
{
+ bd->goner = true;
if (bd->queue->kobj.sd)
sysfs_remove_link(&bd->queue->kobj, "bsg");
cdev_device_del(&bd->cdev, &bd->device);
Consider the following scenario: task A: open() on /dev/bsg/<something> calls chrdev_open() finds and grabs a reference to bsg_device.cdev in inode->i_cdev refcount on that cdev is 2 now (1 from creation + 1 we'd just grabbed) calls bsg_open(). fetches to_bsg_device(inode)->queue - that would be ->queue in the same bsg_device instance. gets preempted away and loses CPU before it gets to calling blk_get_queue(). task B: calls bsg_unregister_queue() on the same queue, which calls cdev_device_del(), which makes cdev impossible to look up and drops the reference to that cdev; refcount is 1 now, so nothing gets freed yet. caller of bsg_unregister_queue() proceeds to destroy the queue and free it, allowing reuse of memory that used to contain it. task A: regains CPU calls blk_get_queue() on something that no longer points to a request_queue instance. In particular, "dying" flag is no longer guaranteed to be there, so we proceed to increment what we think is a queue refcount, corrupting whatever lives in that memory now. Usually we'll end up with memory not reused yet, and blk_get_queue() will fail without buggering anything up. Not guaranteed, though... AFAICS, the fact that request_queue freeing is RCU-delayed means that it can be fixed by the following: * mark bsg_device on bsg_unregister_queue() as goner * have bsg_open() do rcu_read_lock(), then check that flag and do blk_get_queue() only if the flag hadn't been set yet. If we did not observe the flag after rcu_read_lock(), we know that queue have been freed yet - RCU delay couldn't have run out. Comments? Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> ---