This patch replaces one call to compute_parity with the new
raid5_issue_compute_parity. To support md_adma a number of routines and
definitions needed to be made available in raid5.h. In anticipation of
raid6 support the handle_stripe and release_stripe routines were
prefixed with raid5_.
Signed-off-by: Dan Williams <[email protected]>
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index ac43f98..b45720c 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -106,7 +106,7 @@ config MD_RAID10
config MD_RAID5
tristate "RAID-4/RAID-5 mode"
- depends on BLK_DEV_MD
+ depends on BLK_DEV_MD && ADMA
---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
@@ -129,7 +129,7 @@ config MD_RAID5
config MD_RAID6
tristate "RAID-6 mode"
- depends on BLK_DEV_MD
+ depends on BLK_DEV_MD && ADMA
---help---
A RAID-6 set of N drives with a capacity of C MB per drive
provides the capacity of C * (N - 2) MB, and protects
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index d3efedf..9b3e65f 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -25,8 +25,8 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o
-obj-$(CONFIG_MD_RAID5) += raid5.o xor.o
-obj-$(CONFIG_MD_RAID6) += raid6.o xor.o
+obj-$(CONFIG_MD_RAID5) += raid5.o md_adma.o xor.o
+obj-$(CONFIG_MD_RAID6) += raid6.o md_adma.o xor.o
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 25976bf..56a5068 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -25,31 +25,18 @@
#include <asm/atomic.h>
#include <linux/raid/bitmap.h>
-
+#include <linux/adma/md_adma.h>
/*
* Stripe cache
*/
#define NR_STRIPES 256
-#define STRIPE_SIZE PAGE_SIZE
#define STRIPE_SHIFT (PAGE_SHIFT - 9)
-#define STRIPE_SECTORS (STRIPE_SIZE>>9)
-#define IO_THRESHOLD 1
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
#define HASH_MASK (NR_HASH - 1)
#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
-/* bio's attached to a stripe+device for I/O are linked together in bi_sector
- * order without overlap. There may be several bio's per stripe+device, and
- * a bio could span several devices.
- * When walking this list for a particular stripe+device, we must never proceed
- * beyond a bio that extends past this device, as the next bio might no longer
- * be valid.
- * This macro is used to determine the 'next' bio in the list, given the sector
- * of the current stripe+device
- */
-#define r5_next_bio(bio, sect) ( ( (bio)->bi_sector + ((bio)->bi_size>>9) < sect + STRIPE_SECTORS) ? (bio)->bi_next : NULL)
/*
* The following can be used to debug the driver
*/
@@ -101,7 +88,7 @@ static void __release_stripe(raid5_conf_
}
}
}
-static void release_stripe(struct stripe_head *sh)
+void raid5_release_stripe(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
unsigned long flags;
@@ -304,7 +291,7 @@ static int grow_one_stripe(raid5_conf_t
atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes);
INIT_LIST_HEAD(&sh->lru);
- release_stripe(sh);
+ raid5_release_stripe(sh);
return 1;
}
@@ -385,7 +372,7 @@ static int raid5_end_read_request(struct
/* we can return a buffer if we bypassed the cache or
* if the top buffer is not in highmem. If there are
* multiple buffers, leave the extra work to
- * handle_stripe
+ * raid5_handle_stripe
*/
buffer = sh->bh_read[i];
if (buffer &&
@@ -448,7 +435,7 @@ static int raid5_end_read_request(struct
#endif
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
- release_stripe(sh);
+ raid5_release_stripe(sh);
return 0;
}
@@ -909,9 +896,8 @@ static int add_stripe_bio(struct stripe_
return 0;
}
-
/*
- * handle_stripe - do things to a stripe.
+ * raid5_handle_stripe - do things to a stripe.
*
* We lock the stripe and then examine the state of various bits
* to see what needs to be done.
@@ -921,6 +907,7 @@ static int add_stripe_bio(struct stripe_
* schedule a read on some buffers
* schedule a write of some buffers
* return confirmation of parity correctness
+ * issue async stripe operations (xor, memcpy, memset) TBI
*
* Parity calculations are done inside the stripe lock
* buffers are taken off read_list or write_list, and bh_cache buffers
@@ -928,7 +915,7 @@ static int add_stripe_bio(struct stripe_
*
*/
-static void handle_stripe(struct stripe_head *sh)
+void raid5_handle_stripe(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks;
@@ -937,6 +924,7 @@ static void handle_stripe(struct stripe_
int i;
int syncing;
int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
+ int in_write=0, xor=0;
int non_overwrite = 0;
int failed_num=0;
struct r5dev *dev;
@@ -958,8 +946,10 @@ static void handle_stripe(struct stripe_
dev = &sh->dev[i];
clear_bit(R5_Insync, &dev->flags);
- PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
- i, dev->flags, dev->toread, dev->towrite, dev->written);
+ PRINTK("check %d: state 0x%lx read %p write %p written %p"
+ " in_write %p\n",
+ i, dev->flags, dev->toread, dev->towrite, dev->written,
+ dev->inwrite);
/* maybe we can reply to a read */
if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
struct bio *rbi, *rbi2;
@@ -986,6 +976,7 @@ static void handle_stripe(struct stripe_
/* now count some things */
if (test_bit(R5_LOCKED, &dev->flags)) locked++;
if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+ if (test_bit(R5_InAsyncXor, &dev->flags)) xor++;
if (dev->toread) to_read++;
@@ -995,6 +986,7 @@ static void handle_stripe(struct stripe_
non_overwrite++;
}
if (dev->written) written++;
+ if (dev->inwrite) in_write++;
rdev = rcu_dereference(conf->disks[i].rdev);
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
@@ -1253,26 +1245,17 @@ static void handle_stripe(struct stripe_
}
}
}
- /* now if nothing is locked, and if we have enough data, we can start a write request */
- if (locked == 0 && (rcw == 0 ||rmw == 0) &&
+ /* now if nothing is locked, and if we have enough data, we can start a write request
+ * note: if an xor calculation is in progress we can still submit the write, as we rely
+ * on the xor engine to keep the operations in order
+ */
+ if ((locked == 0 || xor) && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)) {
PRINTK("Computing parity...\n");
- compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
- /* now every locked buffer is ready to be written */
- for (i=disks; i--;)
- if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
- PRINTK("Writing block %d\n", i);
- locked++;
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- if (!test_bit(R5_Insync, &sh->dev[i].flags)
- || (i==sh->pd_idx && failed == 0))
- set_bit(STRIPE_INSYNC, &sh->state);
- }
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ locked += raid5_issue_compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
+ /* once R5_InAsyncXor is dropped every locked buffer will be
+ * ready to be written. This is handled by the callback: raid5_adma_callback
+ */
}
}
@@ -1571,14 +1554,14 @@ static int make_request (request_queue_t
* and wait a while
*/
raid5_unplug_device(mddev->queue);
- release_stripe(sh);
+ raid5_release_stripe(sh);
schedule();
goto retry;
}
finish_wait(&conf->wait_for_overlap, &w);
raid5_plug_device(conf);
- handle_stripe(sh);
- release_stripe(sh);
+ raid5_handle_stripe(sh);
+ raid5_release_stripe(sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
@@ -1669,8 +1652,8 @@ static sector_t sync_request(mddev_t *md
clear_bit(STRIPE_INSYNC, &sh->state);
spin_unlock(&sh->lock);
- handle_stripe(sh);
- release_stripe(sh);
+ raid5_handle_stripe(sh);
+ raid5_release_stripe(sh);
return STRIPE_SECTORS;
}
@@ -1725,8 +1708,8 @@ static void raid5d (mddev_t *mddev)
spin_unlock_irq(&conf->device_lock);
handled++;
- handle_stripe(sh);
- release_stripe(sh);
+ raid5_handle_stripe(sh);
+ raid5_release_stripe(sh);
spin_lock_irq(&conf->device_lock);
}
@@ -1918,7 +1901,16 @@ static int run(mddev_t *mddev)
mdname(mddev));
goto abort;
}
+
}
+
+ if (!(conf->adma_engine = adma_engine_init())) {
+ printk(KERN_ERR
+ "raid5: couldn't initialize adma engine for %s\n",
+ mdname(mddev));
+ goto abort;
+ }
+
memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
if (grow_stripes(conf, conf->max_nr_stripes)) {
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 394da82..968f40c 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -3,6 +3,7 @@
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
+#include <linux/adma/md_adma.h>
/*
*
@@ -133,13 +134,14 @@ struct stripe_head {
int pd_idx; /* parity disk index */
unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */
+ atomic_t xor_count; /* nr of async adma ops in flight */
spinlock_t lock;
int bm_seq; /* sequence number for bitmap flushes */
struct r5dev {
struct bio req;
struct bio_vec vec;
struct page *page;
- struct bio *toread, *towrite, *written;
+ struct bio *toread, *towrite, *written, *inwrite;
sector_t sector; /* sector of this page */
unsigned long flags;
} dev[1]; /* allocated with extra space depending of RAID geometry */
@@ -155,6 +157,7 @@ struct stripe_head {
#define R5_Overlap 7 /* There is a pending overlapping request on this block */
#define R5_ReadError 8 /* seen a read error here recently */
#define R5_ReWrite 9 /* have tried to over-write the readerror */
+#define R5_InAsyncXor 10 /* softens R5_LOCKED to allow queueing additional ops */
/*
* Write method
@@ -163,6 +166,8 @@ struct stripe_head {
#define READ_MODIFY_WRITE 2
/* not a write method, but a compute_parity mode */
#define CHECK_PARITY 3
+/* not a write method, but a xor operation recognized by raid5_adma_callback */
+#define COMPUTE_BLOCK 4
/*
* Stripe state
@@ -215,6 +220,7 @@ struct raid5_private_data {
struct list_head delayed_list; /* stripes that have plugged requests */
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
atomic_t preread_active_stripes; /* stripes with scheduled io */
+ adma_engine_t *adma_engine; /* async data operations engine handle */
char cache_name[20];
kmem_cache_t *slab_cache; /* for allocating stripes */
@@ -246,6 +252,8 @@ struct raid5_private_data {
typedef struct raid5_private_data raid5_conf_t;
#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private)
+void raid5_handle_stripe(struct stripe_head *sh);
+void raid5_release_stripe(struct stripe_head *sh);
/*
* Our supported algorithms
@@ -255,4 +263,21 @@ typedef struct raid5_private_data raid5_
#define ALGORITHM_LEFT_SYMMETRIC 2
#define ALGORITHM_RIGHT_SYMMETRIC 3
+/* Definitions needed by md_adma */
+
+/* bio's attached to a stripe+device for I/O are linked together in bi_sector
+ * order without overlap. There may be several bio's per stripe+device, and
+ * a bio could span several devices.
+ * When walking this list for a particular stripe+device, we must never proceed
+ * beyond a bio that extends past this device, as the next bio might no longer
+ * be valid.
+ * This macro is used to determine the 'next' bio in the list, given the sector
+ * of the current stripe+device
+ */
+#define r5_next_bio(bio, sect) ( ( (bio)->bi_sector + ((bio)->bi_size>>9) < sect + STRIPE_SECTORS) ? (bio)->bi_next : NULL)
+
+#define IO_THRESHOLD 1
+#define STRIPE_SIZE PAGE_SIZE
+#define STRIPE_SECTORS (STRIPE_SIZE>>9)
+
#endif
diff --git a/drivers/Kconfig b/drivers/Kconfig
index bddf431..c887854 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -68,6 +68,8 @@ source "drivers/infiniband/Kconfig"
source "drivers/sn/Kconfig"
+source "drivers/adma/Kconfig"
+
source "drivers/edac/Kconfig"
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 619dd96..df547d5 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -73,3 +73,4 @@ obj-$(CONFIG_SGI_IOC4) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/
obj-$(CONFIG_SUPERH) += sh/
+obj-$(CONFIG_ADMA) += adma/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]