Re: [RFC] [PATCH] cache pollution aware __copy_from_user_ll()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

BTW, what are you going to do with the page-faults which may happen
during __copy_user_zeroing_nocache()? The current process may be blocked
in the handler for a while and get FPU registers polluted.
kernel_fpu_begin() won't help the case. This is another issue, though.

> > Thanks.
> > 
> > filemap_copy_from_user() calls __copy_from_user_inatomic() calls
> > __copy_from_user_ll().
> > 
> > I'll look at the code.
> 
> The following is a quick hack of cache aware implementation
> of __copy_from_user_ll() and __copy_from_user_inatomic()
> 
> __copy_from_user_ll_nocache() and __copy_from_user_inatomic_nocache()
> 
> filemap_copy_from_user() calles __copy_from_user_inatomic_nocache()
> instead of __copy_from_user_inatomic() and reduced cashe miss.
> 
> The first column is the cache reference (memory access) and the
> third column is the 3rd level cache miss.
> 
> The following example shows the L3 cache miss is reduced from 37410 to 107.
> 
> 2.6.12.4 nocache version
> Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x3f (multiple flags) count 3000
> Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x200 (read 3rd level cache miss) count 3000
> samples  %        samples  %     app name       symbol name
> 120442    6.4106  107    0.5620  vmlinux        __copy_user_zeroing_nocache
> 80049     4.2606  578    3.0357  vmlinux        journal_add_journal_head
> 69194     3.6829  154    0.8088  vmlinux        journal_dirty_metadata
> 67059     3.5692  78     0.4097  vmlinux        __find_get_block
> 64145     3.4141  32     0.1681  vmlinux        journal_put_journal_head
> pattern9-0-cpu4-0-08161154/summary.out
> 
> The 2.6.12.4 original version is
> Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x3f (multiple flags) count 3000
> Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x200 (read 3rd level cache miss) count 3000
> samples  %        samples  %     app name       symbol name
> 120646    7.4680  37410 62.3355  vmlinux        __copy_from_user_ll
> 79508     4.9215  903    1.5046  vmlinux        _spin_lock
> 65526     4.0561  873    1.4547  vmlinux        journal_add_journal_head
> 59296     3.6704  129    0.2149  vmlinux        __find_get_block
> 58647     3.6302  215    0.3582  vmlinux        journal_dirty_metadata
> 
> What do you think?
> 
> Hiro
> 
> diff -ur linux-2.6.12.4.orig/Makefile linux-2.6.12.4.nocache/Makefile
> --- linux-2.6.12.4.orig/Makefile	2005-08-12 14:37:59.000000000 +0900
> +++ linux-2.6.12.4.nocache/Makefile	2005-08-16 10:22:31.000000000 +0900
> @@ -1,7 +1,7 @@
>  VERSION = 2
>  PATCHLEVEL = 6
>  SUBLEVEL = 12
> -EXTRAVERSION = .4.orig
> +EXTRAVERSION = .4.nocache
>  NAME=Woozy Numbat
>  
>  # *DOCUMENTATION*
> diff -ur linux-2.6.12.4.orig/arch/i386/lib/usercopy.c linux-2.6.12.4.nocache/arch/i386/lib/usercopy.c
> --- linux-2.6.12.4.orig/arch/i386/lib/usercopy.c	2005-08-05 16:04:37.000000000 +0900
> +++ linux-2.6.12.4.nocache/arch/i386/lib/usercopy.c	2005-08-16 10:49:59.000000000 +0900
> @@ -10,6 +10,7 @@
>  #include <linux/highmem.h>
>  #include <linux/blkdev.h>
>  #include <linux/module.h>
> +#include <asm/i387.h>
>  #include <asm/uaccess.h>
>  #include <asm/mmx.h>
>  
> @@ -511,6 +512,110 @@
>  		: "memory");						\
>  } while (0)
>  
> +/* Non Temporal Hint version of mmx_memcpy */
> +/* It is cache aware                       */
> +/* [email protected]               */
> +static unsigned long 
> +__copy_user_zeroing_nocache(void *to, const void *from, size_t len)
> +{
> +        /* Note! gcc doesn't seem to align stack variables properly, so we
> +         * need to make use of unaligned loads and stores.
> +         */
> +	void *p;
> +	int i;
> +
> +	if (unlikely(in_interrupt())){
> +	        __copy_user_zeroing(to, from, len);
> +		return len;
> +	}
> +
> +	p = to;
> +	i = len >> 6; /* len/64 */
> +
> +        kernel_fpu_begin();
> +
> +	__asm__ __volatile__ (
> +		"1: prefetchnta (%0)\n"		/* This set is 28 bytes */
> +		"   prefetchnta 64(%0)\n"
> +		"   prefetchnta 128(%0)\n"
> +		"   prefetchnta 192(%0)\n"
> +		"   prefetchnta 256(%0)\n"
> +		"2:  \n"
> +		".section .fixup, \"ax\"\n"
> +		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
> +		"   jmp 2b\n"
> +		".previous\n"
> +		".section __ex_table,\"a\"\n"
> +		"	.align 4\n"
> +		"	.long 1b, 3b\n"
> +		".previous"
> +		: : "r" (from) );
> +		
> +	for(; i>5; i--)
> +	{
> +		__asm__ __volatile__ (
> +		"1:  prefetchnta 320(%0)\n"
> +		"2:  movq (%0), %%mm0\n"
> +		"  movq 8(%0), %%mm1\n"
> +		"  movq 16(%0), %%mm2\n"
> +		"  movq 24(%0), %%mm3\n"
> +		"  movntq %%mm0, (%1)\n"
> +		"  movntq %%mm1, 8(%1)\n"
> +		"  movntq %%mm2, 16(%1)\n"
> +		"  movntq %%mm3, 24(%1)\n"
> +		"  movq 32(%0), %%mm0\n"
> +		"  movq 40(%0), %%mm1\n"
> +		"  movq 48(%0), %%mm2\n"
> +		"  movq 56(%0), %%mm3\n"
> +		"  movntq %%mm0, 32(%1)\n"
> +		"  movntq %%mm1, 40(%1)\n"
> +		"  movntq %%mm2, 48(%1)\n"
> +		"  movntq %%mm3, 56(%1)\n"
> +		".section .fixup, \"ax\"\n"
> +		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
> +		"   jmp 2b\n"
> +		".previous\n"
> +		".section __ex_table,\"a\"\n"
> +		"	.align 4\n"
> +		"	.long 1b, 3b\n"
> +		".previous"
> +		: : "r" (from), "r" (to) : "memory");
> +		from+=64;
> +		to+=64;
> +	}
> +
> +	for(; i>0; i--)
> +	{
> +		__asm__ __volatile__ (
> +		"  movq (%0), %%mm0\n"
> +		"  movq 8(%0), %%mm1\n"
> +		"  movq 16(%0), %%mm2\n"
> +		"  movq 24(%0), %%mm3\n"
> +		"  movntq %%mm0, (%1)\n"
> +		"  movntq %%mm1, 8(%1)\n"
> +		"  movntq %%mm2, 16(%1)\n"
> +		"  movntq %%mm3, 24(%1)\n"
> +		"  movq 32(%0), %%mm0\n"
> +		"  movq 40(%0), %%mm1\n"
> +		"  movq 48(%0), %%mm2\n"
> +		"  movq 56(%0), %%mm3\n"
> +		"  movntq %%mm0, 32(%1)\n"
> +		"  movntq %%mm1, 40(%1)\n"
> +		"  movntq %%mm2, 48(%1)\n"
> +		"  movntq %%mm3, 56(%1)\n"
> +		: : "r" (from), "r" (to) : "memory");
> +		from+=64;
> +		to+=64;
> +	}
> +	/*
> +	 *	Now do the tail of the block
> +	 */
> +	kernel_fpu_end();
> +	if(i=(len&63))
> +	  __copy_user_zeroing(to, from, i);
> +	return i;
> +}
> +
>  
>  unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
>  {
> @@ -582,6 +687,21 @@
>  	return n;
>  }
>  
> +unsigned long
> +__copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n)
> +{
> +	BUG_ON((long)n < 0);
> +        if (n < 512) {
> +          if (movsl_is_ok(to, from, n))
> +                __copy_user_zeroing(to, from, n);
> +          else
> +                n = __copy_user_zeroing_intel(to, from, n);
> +        }
> +        else
> +          n = __copy_user_zeroing_nocache(to, from, n);
> +	return n;
> +}
> +
>  /**
>   * copy_to_user: - Copy a block of data into user space.
>   * @to:   Destination address, in user space.
> diff -ur linux-2.6.12.4.orig/include/asm/uaccess.h linux-2.6.12.4.nocache/include/asm/uaccess.h
> --- linux-2.6.12.4.orig/include/asm/uaccess.h	2005-08-05 16:04:37.000000000 +0900
> +++ linux-2.6.12.4.nocache/include/asm/uaccess.h	2005-08-16 10:44:05.000000000 +0900
> @@ -413,6 +413,8 @@
>  				const void *from, unsigned long n);
>  unsigned long __must_check __copy_from_user_ll(void *to,
>  				const void __user *from, unsigned long n);
> +unsigned long __must_check __copy_from_user_ll_nocache(void *to,
> +				const void __user *from, unsigned long n);
>  
>  /*
>   * Here we special-case 1, 2 and 4-byte copy_*_user invocations.  On a fault
> @@ -502,11 +504,38 @@
>  }
>  
>  static inline unsigned long
> +__copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n)
> +{
> +	if (__builtin_constant_p(n)) {
> +		unsigned long ret;
> +
> +		switch (n) {
> +		case 1:
> +			__get_user_size(*(u8 *)to, from, 1, ret, 1);
> +			return ret;
> +		case 2:
> +			__get_user_size(*(u16 *)to, from, 2, ret, 2);
> +			return ret;
> +		case 4:
> +			__get_user_size(*(u32 *)to, from, 4, ret, 4);
> +			return ret;
> +		}
> +	}
> +	return __copy_from_user_ll_nocache(to, from, n);
> +}
> +
> +static inline unsigned long
>  __copy_from_user(void *to, const void __user *from, unsigned long n)
>  {
>         might_sleep();
>         return __copy_from_user_inatomic(to, from, n);
>  }
> +static inline unsigned long
> +__copy_from_user_nocache(void *to, const void __user *from, unsigned long n)
> +{
> +       might_sleep();
> +       return __copy_from_user_inatomic_nocache(to, from, n);
> +}
>  unsigned long __must_check copy_to_user(void __user *to,
>  				const void *from, unsigned long n);
>  unsigned long __must_check copy_from_user(void *to,
> diff -ur linux-2.6.12.4.orig/include/asm-i386/uaccess.h linux-2.6.12.4.nocache/include/asm-i386/uaccess.h
> --- linux-2.6.12.4.orig/include/asm-i386/uaccess.h	2005-08-05 16:04:37.000000000 +0900
> +++ linux-2.6.12.4.nocache/include/asm-i386/uaccess.h	2005-08-16 10:44:05.000000000 +0900
> @@ -413,6 +413,8 @@
>  				const void *from, unsigned long n);
>  unsigned long __must_check __copy_from_user_ll(void *to,
>  				const void __user *from, unsigned long n);
> +unsigned long __must_check __copy_from_user_ll_nocache(void *to,
> +				const void __user *from, unsigned long n);
>  
>  /*
>   * Here we special-case 1, 2 and 4-byte copy_*_user invocations.  On a fault
> @@ -502,11 +504,38 @@
>  }
>  
>  static inline unsigned long
> +__copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n)
> +{
> +	if (__builtin_constant_p(n)) {
> +		unsigned long ret;
> +
> +		switch (n) {
> +		case 1:
> +			__get_user_size(*(u8 *)to, from, 1, ret, 1);
> +			return ret;
> +		case 2:
> +			__get_user_size(*(u16 *)to, from, 2, ret, 2);
> +			return ret;
> +		case 4:
> +			__get_user_size(*(u32 *)to, from, 4, ret, 4);
> +			return ret;
> +		}
> +	}
> +	return __copy_from_user_ll_nocache(to, from, n);
> +}
> +
> +static inline unsigned long
>  __copy_from_user(void *to, const void __user *from, unsigned long n)
>  {
>         might_sleep();
>         return __copy_from_user_inatomic(to, from, n);
>  }
> +static inline unsigned long
> +__copy_from_user_nocache(void *to, const void __user *from, unsigned long n)
> +{
> +       might_sleep();
> +       return __copy_from_user_inatomic_nocache(to, from, n);
> +}
>  unsigned long __must_check copy_to_user(void __user *to,
>  				const void *from, unsigned long n);
>  unsigned long __must_check copy_from_user(void *to,
> diff -ur linux-2.6.12.4.orig/include/linux/autoconf.h linux-2.6.12.4.nocache/include/linux/autoconf.h
> --- linux-2.6.12.4.orig/include/linux/autoconf.h	2005-08-15 16:53:01.000000000 +0900
> +++ linux-2.6.12.4.nocache/include/linux/autoconf.h	2005-08-16 10:32:33.000000000 +0900
> @@ -1,7 +1,7 @@
>  /*
>   * Automatically generated C config: don't edit
> - * Linux kernel version: 2.6.12.4.orig
> - * Mon Aug 15 16:53:01 2005
> + * Linux kernel version: 2.6.12.4.nocache
> + * Tue Aug 16 10:32:33 2005
>   */
>  #define AUTOCONF_INCLUDED
>  #define CONFIG_X86 1
> diff -ur linux-2.6.12.4.orig/mm/filemap.c linux-2.6.12.4.nocache/mm/filemap.c
> --- linux-2.6.12.4.orig/mm/filemap.c	2005-08-05 16:04:37.000000000 +0900
> +++ linux-2.6.12.4.nocache/mm/filemap.c	2005-08-16 10:16:06.000000000 +0900
> @@ -1727,13 +1727,13 @@
>  	int left;
>  
>  	kaddr = kmap_atomic(page, KM_USER0);
> -	left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
> +	left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes);
>  	kunmap_atomic(kaddr, KM_USER0);
>  
>  	if (left != 0) {
>  		/* Do it the slow way */
>  		kaddr = kmap(page);
> -		left = __copy_from_user(kaddr + offset, buf, bytes);
> +		left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
>  		kunmap(page);
>  	}
>  	return bytes - left;
> @@ -1750,7 +1750,7 @@
>  		int copy = min(bytes, iov->iov_len - base);
>  
>  		base = 0;
> -		left = __copy_from_user_inatomic(vaddr, buf, copy);
> +		left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);
>  		copied += copy;
>  		bytes -= copy;
>  		vaddr += copy;
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux