Re: [PATCH] include/linux/etherdevice.h, kernel 2.6.14

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Oct 29, 2005 at 04:10:46PM +0200, Michal Srajer wrote:
> Description: Very small optimization patch for include/linux/etherdevice.h in 2.6.14 kernel.

How is this an optimisation?

typedef unsigned char u8;
 
static int is_zero_ether_addr1(const u8 *addr)
{
       return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
}
 
static int is_zero_ether_addr2(const u8 *addr)
{
       return !(addr[0] || addr[1] || addr[2] || addr[3] || addr[4] || addr[5]);
}

produces on x86:

is_zero_ether_addr1:
        pushl   %ebp
        movl    %esp, %ebp
        movl    8(%ebp), %edx
        movb    1(%edx), %al
        orb     (%edx), %al
        orb     2(%edx), %al
        orb     3(%edx), %al
        orb     4(%edx), %al
        orb     5(%edx), %al
        sete    %al
        movzbl  %al, %eax
        leave
        ret

is_zero_ether_addr2:
        pushl   %ebp
        movl    %esp, %ebp
        movl    8(%ebp), %edx
        xorl    %eax, %eax
        cmpb    $0, (%edx)
        jne     .L3
        cmpb    $0, 1(%edx)
        jne     .L3
        cmpb    $0, 2(%edx)
        jne     .L3
        cmpb    $0, 3(%edx)
        jne     .L3
        cmpb    $0, 4(%edx)
        jne     .L3
        cmpb    $0, 5(%edx)
        jne     .L3
        movl    $1, %eax
.L3:
        leave
        ret

and on ARM:

is_zero_ether_addr1:
        ldrb    r1, [r0, #1]    @ zero_extendqisi2
        ldrb    r3, [r0, #0]    @ zero_extendqisi2
        ldrb    r2, [r0, #2]    @ zero_extendqisi2
        orr     r3, r3, r1
        ldrb    r1, [r0, #3]    @ zero_extendqisi2
        orr     r2, r2, r3
        ldrb    r3, [r0, #4]    @ zero_extendqisi2
        orr     r1, r1, r2
        ldrb    r2, [r0, #5]    @ zero_extendqisi2
        orr     r3, r3, r1
        orrs    r2, r2, r3
        movne   r0, #0
        moveq   r0, #1
        mov     pc, lr

is_zero_ether_addr2:
        ldrb    r3, [r0, #0]    @ zero_extendqisi2
        mov     r2, #0
        cmp     r3, r2
        bne     .L3
        ldrb    r3, [r0, #1]    @ zero_extendqisi2
        cmp     r3, r2
        bne     .L3
        ldrb    r3, [r0, #2]    @ zero_extendqisi2
        cmp     r3, r2
        bne     .L3
        ldrb    r3, [r0, #3]    @ zero_extendqisi2
        cmp     r3, r2
        bne     .L3
        ldrb    r3, [r0, #4]    @ zero_extendqisi2
        cmp     r3, r2
        bne     .L3
        ldrb    r3, [r0, #5]    @ zero_extendqisi2
        cmp     r3, r2
        movne   r2, #0
        moveq   r2, #1
.L3:
        mov     r0, r2
        mov     pc, lr

The former looks far more optimised in both cases.  In fact, the
latter on ARM is many times less efficient due to the LDR result
delays being incurred for every test.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:  2.6 Serial core
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux