Re: [patch] speed up single bio_vec allocation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 06 2006, Jens Axboe wrote:
> I still can't help but think we can do better than this, and that this
> is nothing more than optimizing for a benchmark. For high performance
> I/O, you will be doing > 1 page bio's anyway and this patch wont help
> you at all. Perhaps we can just kill bio_vec slabs completely, and
> create bio slabs instead with differing sizes. So instead of having 1
> bio slab and 5 bio_vec slabs, change that to 5 bio slabs that leave room
> for the bio_vec list at the end. That would always eliminate the extra
> allocation, at the cost of blowing the 256-page case into a order 1 page
> allocation (256*16 + sizeof(*bio) > PAGE_SIZE) for the 4kb 64-bit archs,
> which is something I've always tried to avoid.

That would look something like this. You'd probably want to re-tweak the
slab sizes now, so that we get the most out of the slab page. If we
accept the 2^1 order allocation for the largest bio, we can make it
larger than 256 pages at no extra cost. Probably around 500 pages would
still fit inside the two pages.

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08a40f4..b4bf3b3 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -813,7 +813,7 @@ static int crypt_ctr(struct dm_target *t
 		goto bad4;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, MIN_IOS, 4);
+	cc->bs = bioset_create(MIN_IOS, 4);
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index da663d2..581c24a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -60,7 +60,7 @@ static int resize_pool(unsigned int new_
 		if (!_io_pool)
 			return -ENOMEM;
 
-		_bios = bioset_create(16, 16, 4);
+		_bios = bioset_create(16, 4);
 		if (!_bios) {
 			mempool_destroy(_io_pool);
 			_io_pool = NULL;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fc4f743..98f6768 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -973,7 +973,7 @@ static struct mapped_device *alloc_dev(i
 	if (!md->tio_pool)
 		goto bad3;
 
-	md->bs = bioset_create(16, 16, 4);
+	md->bs = bioset_create(16, 4);
 	if (!md->bs)
 		goto bad_no_bioset;
 
diff --git a/fs/bio.c b/fs/bio.c
index aa4d09b..452e8f7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,10 +30,6 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab __read_mostly;
-
-#define BIOVEC_NR_POOLS 6
-
 /*
  * a small number of entries is fine, not going to be performance critical.
  * basically we just need to survive
@@ -41,23 +37,23 @@ static kmem_cache_t *bio_slab __read_mos
 #define BIO_SPLIT_ENTRIES 8	
 mempool_t *bio_split_pool __read_mostly;
 
-struct biovec_slab {
+struct bio_slab {
 	int nr_vecs;
-	char *name; 
+	const char *name;
 	kmem_cache_t *slab;
 };
 
-/*
- * if you change this list, also change bvec_alloc or things will
- * break badly! cannot be bigger than what you can fit into an
- * unsigned short
- */
-
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
-	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
+#define BIOSLAB_NR_POOLS 6
+#define BIOSLAB(x) { .nr_vecs = x, .name = "bio-"__stringify(x) }
+static struct bio_slab bio_slabs[BIOSLAB_NR_POOLS] __read_mostly = {
+	BIOSLAB(0),
+	BIOSLAB(4),
+	BIOSLAB(16),
+	BIOSLAB(64),
+	BIOSLAB(128),
+	BIOSLAB(BIO_MAX_PAGES),
 };
-#undef BV
+#undef BIOSLAB
 
 /*
  * bio_set is used to allow other portions of the IO system to
@@ -66,8 +62,7 @@ static struct biovec_slab bvec_slabs[BIO
  * and the bvec_slabs[].
  */
 struct bio_set {
-	mempool_t *bio_pool;
-	mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+	mempool_t *bio_pools[BIOSLAB_NR_POOLS];
 };
 
 /*
@@ -76,45 +71,12 @@ struct bio_set {
  */
 static struct bio_set *fs_bio_set;
 
-static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
-{
-	struct bio_vec *bvl;
-
-	/*
-	 * see comment near bvec_array define!
-	 */
-	switch (nr) {
-		case   1        : *idx = 0; break;
-		case   2 ...   4: *idx = 1; break;
-		case   5 ...  16: *idx = 2; break;
-		case  17 ...  64: *idx = 3; break;
-		case  65 ... 128: *idx = 4; break;
-		case 129 ... BIO_MAX_PAGES: *idx = 5; break;
-		default:
-			return NULL;
-	}
-	/*
-	 * idx now points to the pool we want to allocate from
-	 */
-
-	bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-	if (bvl) {
-		struct biovec_slab *bp = bvec_slabs + *idx;
-
-		memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
-	}
-
-	return bvl;
-}
-
 void bio_free(struct bio *bio, struct bio_set *bio_set)
 {
 	const int pool_idx = BIO_POOL_IDX(bio);
 
-	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
-	mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
-	mempool_free(bio, bio_set->bio_pool);
+	BIO_BUG_ON(pool_idx >= BIOSLAB_NR_POOLS);
+	mempool_free(bio, bio_set->bio_pools[pool_idx]);
 }
 
 /*
@@ -160,26 +122,51 @@ void bio_init(struct bio *bio)
  **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-	struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+	struct bio *bio = NULL;
+	long idx;
+
+	switch (nr_iovecs) {
+	case   0:
+		idx = 0;
+		break;
+	case   1 ...   4:
+		idx = 1;
+		break;
+	case   5 ...  16:
+		idx = 2;
+		break;
+	case  17 ...  64:
+		idx = 3;
+		break;
+	case  65 ... 128:
+		idx = 4;
+		break;
+	case 129 ... BIO_MAX_PAGES:
+		idx = 5;
+		break;
+	default:
+		goto out;
+	}
+
+	bio = mempool_alloc(bs->bio_pools[idx], gfp_mask);
 
 	if (likely(bio)) {
 		struct bio_vec *bvl = NULL;
 
 		bio_init(bio);
-		if (likely(nr_iovecs)) {
-			unsigned long idx = 0; /* shut up gcc */
-
-			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
-			if (unlikely(!bvl)) {
-				mempool_free(bio, bs->bio_pool);
-				bio = NULL;
-				goto out;
-			}
+
+		if (nr_iovecs) {
+			struct bio_slab *bslab = &bio_slabs[idx];
+
+			bvl = (void *) bio + sizeof(*bio);
+			memset(bvl, 0, bslab->nr_vecs * sizeof(struct bio_vec));
+
 			bio->bi_flags |= idx << BIO_POOL_OFFSET;
-			bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+			bio->bi_max_vecs = bslab->nr_vecs;
 		}
 		bio->bi_io_vec = bvl;
 	}
+
 out:
 	return bio;
 }
@@ -1120,89 +1107,78 @@ struct bio_pair *bio_split(struct bio *b
  * create memory pools for biovec's in a bio_set.
  * use the global biovec slabs created for general use.
  */
-static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
+static int bio_create_pools(struct bio_set *bs, int pool_entries, int scale)
 {
 	int i;
 
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-		struct biovec_slab *bp = bvec_slabs + i;
-		mempool_t **bvp = bs->bvec_pools + i;
+	for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
+		struct bio_slab *bslab = bio_slabs + i;
+		mempool_t **bvp = bs->bio_pools + i;
 
 		if (pool_entries > 1 && i >= scale)
 			pool_entries >>= 1;
 
-		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
+		*bvp = mempool_create_slab_pool(pool_entries, bslab->slab);
 		if (!*bvp)
 			return -ENOMEM;
 	}
 	return 0;
 }
 
-static void biovec_free_pools(struct bio_set *bs)
+static void bio_free_pools(struct bio_set *bs)
 {
 	int i;
 
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-		mempool_t *bvp = bs->bvec_pools[i];
+	for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
+		mempool_t *bp = bs->bio_pools[i];
 
-		if (bvp)
-			mempool_destroy(bvp);
+		if (bp)
+			mempool_destroy(bp);
 	}
 
 }
 
 void bioset_free(struct bio_set *bs)
 {
-	if (bs->bio_pool)
-		mempool_destroy(bs->bio_pool);
-
-	biovec_free_pools(bs);
+	bio_free_pools(bs);
 
 	kfree(bs);
 }
 
-struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
+struct bio_set *bioset_create(int bio_pool_size, int scale)
 {
 	struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 
 	if (!bs)
 		return NULL;
 
-	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
-	if (!bs->bio_pool)
-		goto bad;
-
-	if (!biovec_create_pools(bs, bvec_pool_size, scale))
+	if (!bio_create_pools(bs, bio_pool_size, scale))
 		return bs;
 
-bad:
 	bioset_free(bs);
 	return NULL;
 }
 
-static void __init biovec_init_slabs(void)
+static void __init bio_init_slabs(void)
 {
 	int i;
 
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
+	for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
 		int size;
-		struct biovec_slab *bvs = bvec_slabs + i;
+		struct bio_slab *bs = bio_slabs + i;
 
-		size = bvs->nr_vecs * sizeof(struct bio_vec);
-		bvs->slab = kmem_cache_create(bvs->name, size, 0,
-                                SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+		size = bs->nr_vecs * sizeof(struct bio_vec) + sizeof(struct bio);
+		bs->slab = kmem_cache_create(bs->name, size, 0, SLAB_PANIC,
+						NULL, NULL);
 	}
 }
 
 static int __init init_bio(void)
 {
-	int megabytes, bvec_pool_entries;
-	int scale = BIOVEC_NR_POOLS;
-
-	bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
-				SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	int scale = BIOSLAB_NR_POOLS;
+	int megabytes;
 
-	biovec_init_slabs();
+	bio_init_slabs();
 
 	megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);
 
@@ -1225,9 +1201,8 @@ static int __init init_bio(void)
 	 * the system is completely unable to allocate memory, so we only
 	 * need enough to make progress.
 	 */
-	bvec_pool_entries = 1 + scale;
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, bvec_pool_entries, scale);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, scale);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 092dbd0..d6d0047 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -276,7 +276,7 @@ extern struct bio_pair *bio_split(struct
 extern mempool_t *bio_split_pool;
 extern void bio_pair_release(struct bio_pair *dbio);
 
-extern struct bio_set *bioset_create(int, int, int);
+extern struct bio_set *bioset_create(int, int);
 extern void bioset_free(struct bio_set *);
 
 extern struct bio *bio_alloc(gfp_t, int);

-- 
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux