2.6.17 odd hd slow down

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,
experimenting a little the 2.6 block device layer I detected under some
circumstances a net slowness in the disk throughput. Strangely enough, in fact,
my IDE disk reported a significant performance drop off in correspondence of
certain access patterns.

Following further investigations I was able to simulate this ill behavior in
the following piece of code, clearly showing a non negligible hard-disk slow
down when the step value is set greater than 8. These result in fact far below
the hard-disk real speed (30~70MB/sec), as correctly measured instead in
correspondence of low STEP values (<8). In particular, with step of 512 or
above, the overall performance scored by the disk results below 2MB/sec.

At first I thought to a side-effect of the queue plug/unplug mechanism: the
scattered accesses involve the unplug timeout to each bio. So, I added the
BIO_RW_SYNC flag that - AFAIK - should force the queue
unplugging. Unfortunately nothing changes.

Now, as it is quite possible that I'm missing something, the question is: is
there an effective way of doing scattered disk accesses using bios? In other
words, how can I fix the following program in order to get disk full speed for
steps > 8?

TIA!
Damon

PS: please find below several results corresponding to various steps/scheduler
combinations, along with some configuration specs.

# hdparm -i /dev/hda

ATA device, with non-removable media
        Model Number:       Maxtor 6Y080P0
        Firmware Revision:  YAR41BW0
Standards:
        Supported: 7 6 5 4
        Likely used: 7
Configuration:
        Logical         max     current
        cylinders       16383   16383
        heads           16      16
        sectors/track   63      63
        --
        CHS current addressable sectors:   16514064
        LBA    user addressable sectors:  160086528
        device size with M = 1024*1024:       78167 MBytes
        device size with M = 1000*1000:       81964 MBytes (81 GB)
Capabilities:
        LBA, IORDY(can be disabled)
        Queue depth: 1
        Standby timer values: spec'd by Standard, no device specific minimum
        R/W multiple sector transfer: Max = 16  Current = 16
        Advanced power management level: unknown setting (0x0000)
        Recommended acoustic management value: 192, current value: 254
        DMA: mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 *udma5 udma6
             Cycle time: min=120ns recommended=120ns
        PIO: pio0 pio1 pio2 pio3 pio4
             Cycle time: no flow control=120ns  IORDY flow control=120ns

# uname -a
Linux 2.6.17.1 #2 SMP PREEMPT i686 Intel(R) Xeon(TM) CPU 2.80GHz GNU/Linux

ANTICIPATORY SCHEDULER

STEP (hs)	CYCLES		WRITTEN (MB)	ELAPSED (s)	SPEED (MB/s)
1		61954		242		3		75.432
2		59394		232		3		71.3032
3		16473		64		3		21.843
4		52482		205		3		62.3135
5		14448		56		3		18.1951
6		13617		53		3		17.1732
7		12849		50		3		16.1695
8		47874		187		3		56.2823
9		2569		10		3		3.468
10		2608		10		3		3.716
11		2416		9		3		2.3085
12		2576		10		3		3.468
13		2480		9		3		3.222
14		2424		9		3		2.3084
15		2616		10		3		3.738
16		2288		8		3		2.2619
32		2376		9		3		2.2849
64		2400		9		3		2.3059
128		2408		9		3		2.3098
256		1384		5		3		1.2104
512		1048		4		3		1.761

DEADLINE SCHEDULER

STEP (hs)	CYCLES		WRITTEN (MB)	ELAPSED (s)	SPEED (MB/s)
1		61955		242		3		75.736
2		59907		234		3		72.1307
3		16473		64		3		21.843
4		52994		207		3		63.1816
5		14330		55		3		18.1526
6		13569		53		3		17.1476
7		12817		50		3		16.1618
8		47618		186		3		56.1991
9		2625		10		3		3.734
10		2472		9		3		3.185
11		2512		9		3		3.371
12		2624		10		3		3.764
13		2392		9		3		2.3051
14		2472		9		3		2.3214
15		2664		10		3		3.863
16		2512		9		3		3.305
32		2448		9		3		3.10
64		2520		9		3		3.375
128		2417		9		3		2.3017
256		1305		5		3		1.1776
512		1160		4		3		1.1258

CFQ SCHEDULER

STEP (hs)	CYCLES		WRITTEN (MB)	ELAPSED (s)	SPEED (MB/s)
1		62850		245		3		76.1395
2		60416		236		3		73.940
3		15970		62		3		20.1902
4		53225		207		3		63.2719
5		14945		58		3		19.865
6		14250		55		3		18.1160
7		13682		53		3		17.1986
8		47870		186		3		56.2472
9		2529		9		3		3.170
10		2576		10		3		3.477
11		2472		9		3		3.44
12		2672		10		3		3.933
13		2481		9		3		3.256
14		2592		10		3		3.627
15		2512		9		3		3.386
16		2688		10		3		3.1008
32		2384		9		3		2.2996
64		2320		9		3		2.2734
128		2720		10		3		3.1130
256		1265		4		3		1.1664
512		1088		4		3		1.768

NOOP SCHEDULER

STEP (hs)	CYCLES		WRITTEN (MB)	ELAPSED (s)	SPEED (MB/s)
1		20987		81		3		27.413
2		19974		78		3		25.2373
3		16434		64		3		21.712
4		18541		72		3		23.2482
5		14217		55		3		18.1067
6		13625		53		3		17.1729
7		12489		48		3		16.337
8		48898		191		3		57.3135
9		2560		10		3		3.499
10		2568		10		3		3.332
11		2472		9		3		3.161
12		2568		10		3		3.371
13		2352		9		3		2.2875
14		2584		10		3		3.487
15		2320		9		3		2.2740
16		2544		9		3		3.481
32		2344		9		3		2.2832
64		2416		9		3		2.3069
128		2328		9		3		2.2649
256		1360		5		3		1.2010
512		1440		5		3		1.2190

--- empty       2006-09-05 00:16:24.000000000 +0200
+++ test.c      2006-09-05 00:16:49.000000000 +0200
@@ -0,0 +1,145 @@
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/bio.h>
+
+#define START(t) ({                                            \
+               struct timeval __tv;                            \
+               do_gettimeofday(&__tv);                         \
+               (t) = timeval_to_ns(&__tv);                     \
+       })
+
+#define STOP(t) ({                                             \
+               struct timeval __tv;                            \
+               do_gettimeofday(&__tv);                         \
+               (t) = timeval_to_ns(&__tv) - (t);               \
+       })
+
+DECLARE_WAIT_QUEUE_HEAD(wait);
+atomic_t errors, busy;
+int halt;
+
+void stop_write(unsigned long arg)
+{
+       halt = 1;
+}
+
+int endio(struct bio *bio, unsigned int bytes_done, int error)
+{
+       if (bio->bi_size) {
+               return 1;
+       }
+
+       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+               atomic_inc(&errors);
+       }
+
+       if (atomic_dec_and_test(&busy)) {
+               wake_up(&wait);
+       }
+
+       return 0;
+}
+
+int do_write(struct block_device *bdev,
+            struct page *zero, unsigned long expires, int step)
+{
+       DEFINE_TIMER(timer, stop_write, expires, (unsigned long) NULL);
+       int i;
+
+       add_timer(&timer);
+
+       for (halt = i = 0; !halt; i++) {
+               struct bio *bio = bio_alloc(GFP_NOIO, 1);
+               if (bio) {
+                       atomic_inc(&busy);
+
+                       bio->bi_bdev = bdev;
+                       bio->bi_sector = step * i;
+                       bio_add_page(bio, zero, PAGE_SIZE, 0);
+                       bio->bi_end_io = endio;
+                       submit_bio((1 << BIO_RW) | (1 << BIO_RW_SYNC), bio);
+               } else {
+                       atomic_inc(&errors);
+               }
+       }
+
+       wait_event(wait, !atomic_read(&busy));
+
+       return i;
+}
+
+int write(struct block_device *bdev, int secs, int step)
+{
+       struct page *zero;
+
+       s64 time;
+       unsigned long space;
+       int cycles;
+
+       zero = alloc_page(GFP_KERNEL);
+       if (!zero) {
+               return -ENOMEM;
+       }
+
+       memset(kmap(zero), 0, PAGE_SIZE);
+       kunmap(zero);
+
+       atomic_set(&errors, 0);
+       atomic_set(&busy, 0);
+
+       START(time);
+
+       cycles = do_write(bdev, zero, jiffies + secs * HZ, step);
+
+       STOP(time);
+
+       put_page(zero);
+
+       (void) do_div(time, 1000000);
+
+       space = ((unsigned long) cycles * 1000 * (PAGE_SIZE >> 10)) >> 10;
+
+       printk("%d\t\t%d\t\t%lu\t\t%lu\t\t%lu.%-3lu\n",
+              step, cycles, space / 1000,
+              (unsigned long ) time / 1000,
+              space / (unsigned long) time,
+              space % (unsigned long) time);
+
+       return 0;
+}
+
+static int __init init(void)
+{
+       struct block_device *bdev;
+       int i, err;
+
+       bdev = open_bdev_excl("/dev/hda", 0, THIS_MODULE);
+       if (IS_ERR(bdev)) {
+               printk("device won't open!\n");
+               return PTR_ERR(bdev);
+       }
+
+       printk("STEP (hs)\tCYCLES\t\tWRITTEN (MB)\tELAPSED (s)\tSPEED (MB/s)\n");
+
+       for (i = 1; i < 16; i++) {
+               err = write(bdev, 3, i);
+               if (err < 0) {
+                       printk("%d\t-\t\t-\t\t-\t\t-\n", i);
+               }
+       }
+
+       for (; i < 1024; i <<= 1) {
+               err = write(bdev, 3, i);
+               if (err < 0) {
+                       printk("%d\t-\t\t-\t\t-\t\t-\n", i);
+               }
+       }
+
+       close_bdev_excl(bdev);
+
+       return -EIO;
+}
+
+module_init(init);
+
+MODULE_LICENSE("GPL v2");

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux