Andi Kleen wrote:
> This implements new vDSO for x86-64. The concept is similar
> to the existing vDSOs on i386 and PPC. x86-64 has had static
> vsyscalls before, but these are not flexible enough anymore.
>
> A vDSO is a ELF shared library supplied by the kernel that is mapped into
> user address space. The vDSO mapping is randomized for each process
> for security reasons.
>
> Doing this was needed for clock_gettime, because clock_gettime
> always needs a syscall fallback and having one at a fixed
> address would have made buffer overflow exploits too easy to write.
>
> The vdso can be disabled with vdso=0
>
> It currently includes a new gettimeofday implemention and optimized
> clock_gettime(). The gettimeofday implementation is slightly faster
> than the one in the old vsyscall. clock_gettime is significantly faster
> than the syscall for CLOCK_MONOTONIC and CLOCK_REALTIME.
>
> The new calls are generally faster than the old vsyscall.
>
> TBD: add new benchmarks
>
> Advantages over the old x86-64 vsyscalls:
> - Extensible
> - Randomized
> - Cleaner
> - Easier to virtualize (the old static address range previously causes
> overhead e.g. for Xen because it has to create special page tables for it)
>
> Weak points:
> - glibc support still to be written
>
> The VM interface is partly based on Ingo Molnar's i386 version.
>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
> Documentation/kernel-parameters.txt | 2
> arch/x86_64/Makefile | 3
> arch/x86_64/ia32/ia32_binfmt.c | 1
> arch/x86_64/kernel/time.c | 1
> arch/x86_64/kernel/vmlinux.lds.S | 12 +++
> arch/x86_64/kernel/vsyscall.c | 22 +----
> arch/x86_64/mm/init.c | 17 ++++
> arch/x86_64/vdso/Makefile | 49 ++++++++++++
> arch/x86_64/vdso/vclock_gettime.c | 120 +++++++++++++++++++++++++++++++
> arch/x86_64/vdso/vdso-note.S | 25 ++++++
> arch/x86_64/vdso/vdso-start.S | 2
> arch/x86_64/vdso/vdso.S | 2
> arch/x86_64/vdso/vdso.lds.S | 77 ++++++++++++++++++++
> arch/x86_64/vdso/vextern.h | 16 ++++
> arch/x86_64/vdso/vgetcpu.c | 50 +++++++++++++
> arch/x86_64/vdso/vma.c | 137 ++++++++++++++++++++++++++++++++++++
> arch/x86_64/vdso/voffset.h | 1
> arch/x86_64/vdso/vvar.c | 12 +++
> include/asm-x86_64/auxvec.h | 2
> include/asm-x86_64/elf.h | 13 +++
> include/asm-x86_64/mmu.h | 1
> include/asm-x86_64/pgtable.h | 8 +-
> include/asm-x86_64/vgtod.h | 29 +++++++
> include/asm-x86_64/vsyscall.h | 3
> 24 files changed, 583 insertions(+), 22 deletions(-)
>
> Index: linux/arch/x86_64/ia32/ia32_binfmt.c
> ===================================================================
> --- linux.orig/arch/x86_64/ia32/ia32_binfmt.c
> +++ linux/arch/x86_64/ia32/ia32_binfmt.c
> @@ -38,6 +38,7 @@
>
> int sysctl_vsyscall32 = 1;
>
> +#undef ARCH_DLINFO
> #define ARCH_DLINFO do { \
> if (sysctl_vsyscall32) { \
> NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
> Index: linux/arch/x86_64/kernel/vmlinux.lds.S
> ===================================================================
> --- linux.orig/arch/x86_64/kernel/vmlinux.lds.S
> +++ linux/arch/x86_64/kernel/vmlinux.lds.S
> @@ -94,6 +94,9 @@ SECTIONS
> .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
> { *(.vsyscall_gtod_data) }
> vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
> + .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
> + { *(.vsyscall_clock) }
> + vsyscall_clock = VVIRT(.vsyscall_clock);
>
>
> .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
> @@ -153,6 +156,8 @@ SECTIONS
>
> . = ALIGN(4096); /* Init code and data */
> __init_begin = .;
> +
> +
> .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
> _sinittext = .;
> *(.init.text)
> @@ -190,6 +195,12 @@ SECTIONS
> .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
> .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
>
> +/* vdso blob that is mapped into user space */
> + vdso_start = . ;
> + .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
> + . = ALIGN(4096);
> + vdso_end = .;
> +
> #ifdef CONFIG_BLK_DEV_INITRD
> . = ALIGN(4096);
> __initramfs_start = .;
> @@ -202,6 +213,7 @@ SECTIONS
> .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
> __per_cpu_end = .;
> . = ALIGN(4096);
> +
> __init_end = .;
>
> . = ALIGN(4096);
> Index: linux/arch/x86_64/mm/init.c
> ===================================================================
> --- linux.orig/arch/x86_64/mm/init.c
> +++ linux/arch/x86_64/mm/init.c
> @@ -159,6 +159,14 @@ static __init void set_pte_phys(unsigned
> __flush_tlb_one(vaddr);
> }
>
> +void __init
> +set_kernel_map(void *vaddr,unsigned long len,unsigned long phys,pgprot_t prot)
> +{
> + void *end = vaddr + ALIGN(len, PAGE_SIZE);
> + for (; vaddr < end; vaddr += PAGE_SIZE, phys += PAGE_SIZE)
> + set_pte_phys((unsigned long)vaddr, phys, prot);
> +}
> +
> /* NOTE: this is meant to be run only at boot */
> void __init
> __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
> @@ -756,3 +764,12 @@ int in_gate_area_no_task(unsigned long a
> {
> return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
> }
> +
> +const char *arch_vma_name(struct vm_area_struct *vma)
> +{
> + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
> + return "[vdso]";
> + if (vma == &gate_vma)
> + return "[vsyscall]";
> + return NULL;
> +}
> Index: linux/arch/x86_64/vdso/vdso-note.S
> ===================================================================
> --- /dev/null
> +++ linux/arch/x86_64/vdso/vdso-note.S
> @@ -0,0 +1,25 @@
> +/*
> + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
> + * Here we can supply some information useful to userland.
> + */
> +
> +#include <linux/uts.h>
> +#include <linux/version.h>
> +
> +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
>
Use linux/elfnote.h?
J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]