Explain the stages of resize_stripes so that it is clear what it
happening, why GFP_NOIO is needed, and how -ENOMEM is handled.
Also move the releasing of old stripes and the old kmem_cache
earlier and lose the need for 'oldstripes'.
Signed-off-by: Neil Brown <[email protected]>
### Diffstat output
./drivers/md/raid5.c | 51 ++++++++++++++++++++++++++++++++-------------------
1 file changed, 32 insertions(+), 19 deletions(-)
diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2006-03-17 18:18:19.000000000 +1100
+++ ./drivers/md/raid5.c 2006-03-17 18:18:32.000000000 +1100
@@ -334,19 +334,31 @@ static int grow_stripes(raid5_conf_t *co
}
static int resize_stripes(raid5_conf_t *conf, int newsize)
{
- /* make all the stripes able to hold 'newsize' devices.
+ /* Make all the stripes able to hold 'newsize' devices.
* New slots in each stripe get 'page' set to a new page.
- * We allocate all the new stripes first, then if that succeeds,
- * copy everything across.
- * Finally we add new pages. This could fail, but we leave
- * the stripe cache at it's new size, just with some pages empty.
*
- * We use GFP_NOIO allocations as IO to the raid5 is blocked
- * at some points in this operation.
+ * This happens in stages:
+ * 1/ create a new kmem_cache and allocate the required number of
+ * stripe_heads.
+ * 2/ gather all the old stripe_heads and tranfer the pages across
+ * to the new stripe_heads. This will have the side effect of
+ * freezing the array as once all stripe_heads have been collected,
+ * no IO will be possible. Old stripe heads are freed once their
+ * pages have been transferred over, and the old kmem_cache is
+ * freed when all stripes are done.
+ * 3/ reallocate conf->disks to be suitable bigger. If this fails,
+ * we simple return a failre status - no need to clean anything up.
+ * 4/ allocate new pages for the new slots in the new stripe_heads.
+ * If this fails, we don't bother trying the shrink the
+ * stripe_heads down again, we just leave them as they are.
+ * As each stripe_head is processed the new one is released into
+ * active service.
+ *
+ * Once step2 is started, we cannot afford to wait for a write,
+ * so we use GFP_NOIO allocations.
*/
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
- LIST_HEAD(oldstripes);
struct disk_info *ndisks;
int err = 0;
kmem_cache_t *sc;
@@ -355,6 +367,7 @@ static int resize_stripes(raid5_conf_t *
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
+ /* Step 1 */
sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
@@ -362,7 +375,7 @@ static int resize_stripes(raid5_conf_t *
return -ENOMEM;
for (i = conf->max_nr_stripes; i; i--) {
- nsh = kmem_cache_alloc(sc, GFP_NOIO);
+ nsh = kmem_cache_alloc(sc, GFP_KERNEL);
if (!nsh)
break;
@@ -383,7 +396,8 @@ static int resize_stripes(raid5_conf_t *
kmem_cache_destroy(sc);
return -ENOMEM;
}
- /* OK, we have enough stripes, start collecting inactive
+ /* Step 2 - Must use GFP_NOIO now.
+ * OK, we have enough stripes, start collecting inactive
* stripes and copying them over
*/
list_for_each_entry(nsh, &newstripes, lru) {
@@ -400,10 +414,11 @@ static int resize_stripes(raid5_conf_t *
nsh->dev[i].page = osh->dev[i].page;
for( ; i<newsize; i++)
nsh->dev[i].page = NULL;
- list_add(&osh->lru, &oldstripes);
+ kmem_cache_free(conf->slab_cache, osh);
}
- /* Got them all.
- * Return the new ones and free the old ones.
+ kmem_cache_destroy(conf->slab_cache);
+
+ /* Step 3.
* At this point, we are holding all the stripes so the array
* is completely stalled, so now is a good time to resize
* conf->disks.
@@ -416,6 +431,8 @@ static int resize_stripes(raid5_conf_t *
conf->disks = ndisks;
} else
err = -ENOMEM;
+
+ /* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del_init(&nsh->lru);
@@ -428,12 +445,8 @@ static int resize_stripes(raid5_conf_t *
}
release_stripe(nsh);
}
- while(!list_empty(&oldstripes)) {
- osh = list_entry(oldstripes.next, struct stripe_head, lru);
- list_del(&osh->lru);
- kmem_cache_free(conf->slab_cache, osh);
- }
- kmem_cache_destroy(conf->slab_cache);
+ /* critical section pass, GFP_NOIO no longer needed */
+
conf->slab_cache = sc;
conf->active_name = 1-conf->active_name;
conf->pool_size = newsize;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]