Re: [PATCH] [8/30] x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Andi Kleen wrote:
> This implements new vDSO for x86-64.  The concept is similar
> to the existing vDSOs on i386 and PPC.  x86-64 has had static
> vsyscalls before,  but these are not flexible enough anymore.
>
> A vDSO is a ELF shared library supplied by the kernel that is mapped into 
> user address space.  The vDSO mapping is randomized for each process
> for security reasons.
>
> Doing this was needed for clock_gettime, because clock_gettime
> always needs a syscall fallback and having one at a fixed
> address would have made buffer overflow exploits too easy to write.
>
> The vdso can be disabled with vdso=0
>
> It currently includes a new gettimeofday implemention and optimized
> clock_gettime(). The gettimeofday implementation is slightly faster
> than the one in the old vsyscall.  clock_gettime is significantly faster 
> than the syscall for CLOCK_MONOTONIC and CLOCK_REALTIME.
>
> The new calls are generally faster than the old vsyscall. 
>
> TBD: add new benchmarks
>
> Advantages over the old x86-64 vsyscalls:
> - Extensible
> - Randomized
> - Cleaner
> - Easier to virtualize (the old static address range previously causes
> overhead e.g. for Xen because it has to create special page tables for it) 
>
> Weak points: 
> - glibc support still to be written
>
> The VM interface is partly based on Ingo Molnar's i386 version.
>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
>  Documentation/kernel-parameters.txt |    2 
>  arch/x86_64/Makefile                |    3 
>  arch/x86_64/ia32/ia32_binfmt.c      |    1 
>  arch/x86_64/kernel/time.c           |    1 
>  arch/x86_64/kernel/vmlinux.lds.S    |   12 +++
>  arch/x86_64/kernel/vsyscall.c       |   22 +----
>  arch/x86_64/mm/init.c               |   17 ++++
>  arch/x86_64/vdso/Makefile           |   49 ++++++++++++
>  arch/x86_64/vdso/vclock_gettime.c   |  120 +++++++++++++++++++++++++++++++
>  arch/x86_64/vdso/vdso-note.S        |   25 ++++++
>  arch/x86_64/vdso/vdso-start.S       |    2 
>  arch/x86_64/vdso/vdso.S             |    2 
>  arch/x86_64/vdso/vdso.lds.S         |   77 ++++++++++++++++++++
>  arch/x86_64/vdso/vextern.h          |   16 ++++
>  arch/x86_64/vdso/vgetcpu.c          |   50 +++++++++++++
>  arch/x86_64/vdso/vma.c              |  137 ++++++++++++++++++++++++++++++++++++
>  arch/x86_64/vdso/voffset.h          |    1 
>  arch/x86_64/vdso/vvar.c             |   12 +++
>  include/asm-x86_64/auxvec.h         |    2 
>  include/asm-x86_64/elf.h            |   13 +++
>  include/asm-x86_64/mmu.h            |    1 
>  include/asm-x86_64/pgtable.h        |    8 +-
>  include/asm-x86_64/vgtod.h          |   29 +++++++
>  include/asm-x86_64/vsyscall.h       |    3 
>  24 files changed, 583 insertions(+), 22 deletions(-)
>
> Index: linux/arch/x86_64/ia32/ia32_binfmt.c
> ===================================================================
> --- linux.orig/arch/x86_64/ia32/ia32_binfmt.c
> +++ linux/arch/x86_64/ia32/ia32_binfmt.c
> @@ -38,6 +38,7 @@
>  
>  int sysctl_vsyscall32 = 1;
>  
> +#undef ARCH_DLINFO
>  #define ARCH_DLINFO do {  \
>  	if (sysctl_vsyscall32) { \
>  	NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
> Index: linux/arch/x86_64/kernel/vmlinux.lds.S
> ===================================================================
> --- linux.orig/arch/x86_64/kernel/vmlinux.lds.S
> +++ linux/arch/x86_64/kernel/vmlinux.lds.S
> @@ -94,6 +94,9 @@ SECTIONS
>    .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
>  		{ *(.vsyscall_gtod_data) }
>    vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
> +  .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
> +		{ *(.vsyscall_clock) }
> +  vsyscall_clock = VVIRT(.vsyscall_clock);
>  
>  
>    .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
> @@ -153,6 +156,8 @@ SECTIONS
>  
>    . = ALIGN(4096);		/* Init code and data */
>    __init_begin = .;
> +
> +
>    .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
>  	_sinittext = .;
>  	*(.init.text)
> @@ -190,6 +195,12 @@ SECTIONS
>    .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
>    .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
>  
> +/* vdso blob that is mapped into user space */
> +  vdso_start = . ;
> +  .vdso  : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
> +  . = ALIGN(4096);
> +  vdso_end = .;
> +
>  #ifdef CONFIG_BLK_DEV_INITRD
>    . = ALIGN(4096);
>    __initramfs_start = .;
> @@ -202,6 +213,7 @@ SECTIONS
>    .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
>    __per_cpu_end = .;
>    . = ALIGN(4096);
> +
>    __init_end = .;
>  
>    . = ALIGN(4096);
> Index: linux/arch/x86_64/mm/init.c
> ===================================================================
> --- linux.orig/arch/x86_64/mm/init.c
> +++ linux/arch/x86_64/mm/init.c
> @@ -159,6 +159,14 @@ static __init void set_pte_phys(unsigned
>  	__flush_tlb_one(vaddr);
>  }
>  
> +void __init
> +set_kernel_map(void *vaddr,unsigned long len,unsigned long phys,pgprot_t prot)
> +{
> +	void *end = vaddr + ALIGN(len, PAGE_SIZE);
> +	for (; vaddr < end; vaddr += PAGE_SIZE, phys += PAGE_SIZE)
> +		set_pte_phys((unsigned long)vaddr, phys, prot);
> +}
> +
>  /* NOTE: this is meant to be run only at boot */
>  void __init 
>  __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
> @@ -756,3 +764,12 @@ int in_gate_area_no_task(unsigned long a
>  {
>  	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
>  }
> +
> +const char *arch_vma_name(struct vm_area_struct *vma)
> +{
> +	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
> +		return "[vdso]";
> +	if (vma == &gate_vma)
> +		return "[vsyscall]";
> +	return NULL;
> +}
> Index: linux/arch/x86_64/vdso/vdso-note.S
> ===================================================================
> --- /dev/null
> +++ linux/arch/x86_64/vdso/vdso-note.S
> @@ -0,0 +1,25 @@
> +/*
> + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
> + * Here we can supply some information useful to userland.
> + */
> +
> +#include <linux/uts.h>
> +#include <linux/version.h>
> +
> +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
>   

Use linux/elfnote.h?

    J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux