Andrew, please hold on this, Al Viro doesn't agree with it.
Calling get_gendisk() from bdget() changes the bdget() semantics too
much. For one it enables bdget() to load modules.
Al proposed the following approach. Neil can you agree with this too?
---
Avoid the nesting of bd_mutex by serializing the locks. This is made
easier by changing the ->bd_part_count rules, its now only changed for
the first openers/closers.
Signed-off-by: Peter Zijlstra <[email protected]>
---
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 92de28d..0ffc4f0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -898,12 +889,15 @@ void bd_set_size(struct block_device *bd
}
EXPORT_SYMBOL(bd_set_size);
+static int __blkdev_put(struct block_device *bdev, int part);
+
static int do_open(struct block_device *bdev, struct file *file)
{
struct module *owner = NULL;
struct gendisk *disk;
int ret = -ENXIO;
int part;
+ struct block_device *whole = NULL;
file->f_mapping = bdev->bd_inode->i_mapping;
lock_kernel();
@@ -937,30 +931,42 @@ static int do_open(struct block_device *
rescan_partitions(disk, bdev);
} else {
struct hd_struct *p;
- struct block_device *whole;
+
+ mutex_unlock(&bdev->bd_mutex);
+
whole = bdget_disk(disk, 0);
ret = -ENOMEM;
if (!whole)
- goto out_first;
+ goto out_first_lock;
ret = blkdev_get(whole, file->f_mode, file->f_flags);
if (ret)
- goto out_first;
- bdev->bd_contains = whole;
+ goto out_first_lock;
+
mutex_lock(&whole->bd_mutex);
whole->bd_part_count++;
p = disk->part[part - 1];
- bdev->bd_inode->i_data.backing_dev_info =
- whole->bd_inode->i_data.backing_dev_info;
if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
- whole->bd_part_count--;
mutex_unlock(&whole->bd_mutex);
ret = -ENXIO;
- goto out_first;
+ goto out_first_lock;
}
kobject_get(&p->kobj);
- bdev->bd_part = p;
- bd_set_size(bdev, (loff_t) p->nr_sects << 9);
mutex_unlock(&whole->bd_mutex);
+
+ mutex_lock(&bdev->bd_mutex);
+ if (bdev->bd_contains != whole) {
+ bdev->bd_contains = whole;
+ bdev->bd_inode->i_data.backing_dev_info =
+ whole->bd_inode->i_data.backing_dev_info;
+ bdev->bd_part = p;
+ bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+ whole = NULL;
+ } else {
+ mutex_unlock(&bdev->bd_mutex);
+ kobject_put(&p->kobj);
+ __blkdev_put(whole, 1);
+ mutex_lock(&bdev->bd_mutex);
+ }
}
} else {
put_disk(disk);
@@ -973,10 +979,6 @@ static int do_open(struct block_device *
}
if (bdev->bd_invalidated)
rescan_partitions(bdev->bd_disk, bdev);
- } else {
- mutex_lock(&bdev->bd_contains->bd_mutex);
- bdev->bd_contains->bd_part_count++;
- mutex_unlock(&bdev->bd_contains->bd_mutex);
}
}
bdev->bd_openers++;
@@ -984,11 +986,12 @@ static int do_open(struct block_device *
unlock_kernel();
return 0;
+out_first_lock:
+ if (whole)
+ __blkdev_put(whole, 1);
+ mutex_lock(&bdev->bd_mutex);
out_first:
bdev->bd_disk = NULL;
- bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
- if (bdev != bdev->bd_contains)
- blkdev_put(bdev->bd_contains);
bdev->bd_contains = NULL;
put_disk(disk);
module_put(owner);
@@ -1049,14 +1052,17 @@ static int blkdev_open(struct inode * in
return res;
}
-int blkdev_put(struct block_device *bdev)
+static int __blkdev_put(struct block_device *bdev, int part)
{
int ret = 0;
struct inode *bd_inode = bdev->bd_inode;
struct gendisk *disk = bdev->bd_disk;
+ struct block_device *victim = NULL;
mutex_lock(&bdev->bd_mutex);
lock_kernel();
+ if (part)
+ bdev->bd_part_count--;
if (!--bdev->bd_openers) {
sync_blockdev(bdev);
kill_bdev(bdev);
@@ -1064,10 +1070,6 @@ int blkdev_put(struct block_device *bdev
if (bdev->bd_contains == bdev) {
if (disk->fops->release)
ret = disk->fops->release(bd_inode, NULL);
- } else {
- mutex_lock(&bdev->bd_contains->bd_mutex);
- bdev->bd_contains->bd_part_count--;
- mutex_unlock(&bdev->bd_contains->bd_mutex);
}
if (!bdev->bd_openers) {
struct module *owner = disk->fops->owner;
@@ -1081,17 +1083,23 @@ int blkdev_put(struct block_device *bdev
}
bdev->bd_disk = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
- if (bdev != bdev->bd_contains) {
- blkdev_put(bdev->bd_contains);
- }
+ if (bdev != bdev->bd_contains)
+ victim = bdev->bd_contains;
bdev->bd_contains = NULL;
}
unlock_kernel();
mutex_unlock(&bdev->bd_mutex);
+ if (victim)
+ __blkdev_put(victim, 1);
bdput(bdev);
return ret;
}
+int blkdev_put(struct block_device *bdev)
+{
+ return __blkdev_put(bdev, 0);
+}
+
EXPORT_SYMBOL(blkdev_put);
static int blkdev_close(struct inode * inode, struct file * filp)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]