Hi,
$subject says pretty much all, this is the x86_64 version of the smp
alternatives patch. Detailed description and patch are attached below.
cheers,
Gerd
This is the x86_64 version of the smp alternative patch.
Changes are largely identical to the i386 version:
* alternative #define are moved to the new alternative.h file.
* one new elf section with pointers to the lock prefixes which can be
nop'ed out for non-smp.
* two new elf sections simliar to the "classic" alternatives to
replace SMP code with simpler UP code.
* fixup headers to use alternative.h instead of defining their own
LOCK / LOCK_PREFIX macros.
The patch reuses the i386 version of the alternatives code to avoid code
duplication. The code in alternatives.c was shuffled around a bit to
reduce the number of #ifdefs needed. It also got some tweaks needed for
x86_64 (vsyscall page handling) and new features (noreplacement option
which was x86_64 only up to now). Debug printk's are changed from
compile-time to runtime.
Loosely based on a early version from Bastian Blank <[email protected]>
Signed-off-by: Gerd Hoffmann <[email protected]>
Index: vanilla-2.6.17-rc1/arch/x86_64/kernel/Makefile
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/x86_64/kernel/Makefile 2006-03-20 06:53:29.000000000 +0100
+++ vanilla-2.6.17-rc1/arch/x86_64/kernel/Makefile 2006-04-11 16:01:46.000000000 +0200
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o trap
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- dmi_scan.o pci-dma.o pci-nommu.o
+ dmi_scan.o pci-dma.o pci-nommu.o alternative.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -50,4 +50,5 @@ quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
dmi_scan-y += ../../i386/kernel/dmi_scan.o
+alternative-y += ../../i386/kernel/alternative.o
Index: vanilla-2.6.17-rc1/arch/x86_64/kernel/module.c
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/x86_64/kernel/module.c 2006-03-20 06:53:29.000000000 +0100
+++ vanilla-2.6.17-rc1/arch/x86_64/kernel/module.c 2006-04-11 14:47:47.000000000 +0200
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
return -ENOSYS;
}
-extern void apply_alternatives(void *start, void *end);
-
int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
+ const Elf_Shdr *sechdrs,
+ struct module *me)
{
- const Elf_Shdr *s;
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- /* look for .altinstructions to patch */
- for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- void *seg;
- if (strcmp(".altinstructions", secstrings + s->sh_name))
- continue;
- seg = (void *)s->sh_addr;
- apply_alternatives(seg, seg + s->sh_size);
- }
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ if (!strcmp(".text", secstrings + s->sh_name))
+ text = s;
+ if (!strcmp(".altinstructions", secstrings + s->sh_name))
+ alt = s;
+ if (!strcmp(".smp_locks", secstrings + s->sh_name))
+ locks= s;
+ }
+
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
+ apply_alternatives(aseg, aseg + alt->sh_size);
+ }
+ if (locks && text) {
+ void *lseg = (void *)locks->sh_addr;
+ void *tseg = (void *)text->sh_addr;
+ alternatives_smp_module_add(me, me->name,
+ lseg, lseg + locks->sh_size,
+ tseg, tseg + text->sh_size);
+ }
return 0;
}
void module_arch_cleanup(struct module *mod)
{
+ alternatives_smp_module_del(mod);
}
Index: vanilla-2.6.17-rc1/arch/x86_64/kernel/setup.c
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/x86_64/kernel/setup.c 2006-04-11 14:04:05.000000000 +0200
+++ vanilla-2.6.17-rc1/arch/x86_64/kernel/setup.c 2006-04-11 17:09:18.000000000 +0200
@@ -471,80 +471,6 @@ contig_initmem_init(unsigned long start_
}
#endif
-/* Use inline assembly to define this because the nops are defined
- as inline assembly strings in the include files and we cannot
- get them easily into strings. */
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
- NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-
-extern char __vsyscall_0;
-
-/* Replace instructions with better alternatives for this CPU type.
-
- This runs before SMP is initialized to avoid SMP problems with
- self modifying code. This implies that assymetric systems where
- APs have less capabilities than the boot processor are not handled.
- In this case boot with "noreplacement". */
-void apply_alternatives(void *start, void *end)
-{
- struct alt_instr *a;
- int diff, i, k;
- for (a = start; (void *)a < end; a++) {
- u8 *instr;
-
- if (!boot_cpu_has(a->cpuid))
- continue;
-
- BUG_ON(a->replacementlen > a->instrlen);
- instr = a->instr;
- /* vsyscall code is not mapped yet. resolve it manually. */
- if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
- instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
- __inline_memcpy(instr, a->replacement, a->replacementlen);
- diff = a->instrlen - a->replacementlen;
-
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- __inline_memcpy(instr + i, k8_nops[k], k);
- }
- }
-}
-
-static int no_replacement __initdata = 0;
-
-void __init alternative_instructions(void)
-{
- extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
- if (no_replacement)
- return;
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static int __init noreplacement_setup(char *s)
-{
- no_replacement = 1;
- return 1;
-}
-
-__setup("noreplacement", noreplacement_setup);
-
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
#ifdef CONFIG_EDD_MODULE
@@ -1273,7 +1199,7 @@ static int show_cpuinfo(struct seq_file
/* Other (Linux-defined) */
"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
- "constant_tsc", NULL, NULL,
+ "constant_tsc", NULL, "up",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
Index: vanilla-2.6.17-rc1/arch/x86_64/kernel/smpboot.c
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/x86_64/kernel/smpboot.c 2006-04-11 14:04:05.000000000 +0200
+++ vanilla-2.6.17-rc1/arch/x86_64/kernel/smpboot.c 2006-04-11 14:47:47.000000000 +0200
@@ -797,6 +797,8 @@ static int __cpuinit do_boot_cpu(int cpu
}
+ alternatives_smp_switch(1);
+
c_idle.idle = get_idle_for_cpu(cpu);
if (c_idle.idle) {
@@ -1259,6 +1261,8 @@ void __cpu_die(unsigned int cpu)
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
printk ("CPU %d is now offline\n", cpu);
+ if (1 == num_online_cpus())
+ alternatives_smp_switch(0);
return;
}
msleep(100);
Index: vanilla-2.6.17-rc1/arch/x86_64/kernel/vmlinux.lds.S
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/x86_64/kernel/vmlinux.lds.S 2006-04-11 14:04:05.000000000 +0200
+++ vanilla-2.6.17-rc1/arch/x86_64/kernel/vmlinux.lds.S 2006-04-11 14:47:47.000000000 +0200
@@ -131,6 +131,26 @@ SECTIONS
*(.data.page_aligned)
}
+ /* might get freed after init */
+ . = ALIGN(4096);
+ __smp_alt_begin = .;
+ __smp_alt_instructions = .;
+ .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+ *(.smp_altinstructions)
+ }
+ __smp_alt_instructions_end = .;
+ . = ALIGN(8);
+ __smp_locks = .;
+ .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+ *(.smp_locks)
+ }
+ __smp_locks_end = .;
+ .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+ *(.smp_altinstr_replacement)
+ }
+ . = ALIGN(4096);
+ __smp_alt_end = .;
+
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
Index: vanilla-2.6.17-rc1/arch/x86_64/mm/init.c
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/x86_64/mm/init.c 2006-04-11 14:04:05.000000000 +0200
+++ vanilla-2.6.17-rc1/arch/x86_64/mm/init.c 2006-04-12 09:49:30.000000000 +0200
@@ -613,20 +613,29 @@ void __init mem_init(void)
#endif
}
-void free_initmem(void)
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
unsigned long addr;
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+ if (begin >= end)
+ return;
+
+ printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
}
+}
+
+void free_initmem(void)
+{
memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
- printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
+ free_init_pages("unused kernel memory",
+ (unsigned long)(&__init_begin),
+ (unsigned long)(&__init_end));
}
#ifdef CONFIG_DEBUG_RODATA
@@ -655,15 +664,7 @@ void mark_rodata_ro(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start >= end)
- return;
- printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_init_pages("initrd memory", start, end);
}
#endif
Index: vanilla-2.6.17-rc1/include/asm-x86_64/alternative.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ vanilla-2.6.17-rc1/include/asm-x86_64/alternative.h 2006-04-11 15:54:39.000000000 +0200
@@ -0,0 +1,146 @@
+#ifndef _X86_64_ALTERNATIVE_H
+#define _X86_64_ALTERNATIVE_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+
+struct alt_instr {
+ u8 *instr; /* original instruction */
+ u8 *replacement;
+ u8 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction, <= instrlen */
+ u8 pad[5];
+};
+
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+struct module;
+extern void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end);
+extern void alternatives_smp_module_del(struct module *mod);
+extern void alternatives_smp_switch(int smp);
+
+#endif
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661b\n" /* label */ \
+ " .quad 663f\n" /* new instruction */ \
+ " .byte %c0\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" :: "i" (feature) : "memory")
+
+/*
+ * Alternative inline assembly with input.
+ *
+ * Pecularities:
+ * No memory clobber here.
+ * Argument numbers start with 1.
+ * Best is to use constraints that are fixed size (like (%1) ... "r")
+ * If you use variable sized constraints like "m" or "g" in the
+ * replacement make sure to pad to the worst case length.
+ */
+#define alternative_input(oldinstr, newinstr, feature, input...) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661b\n" /* label */ \
+ " .quad 663f\n" /* new instruction */ \
+ " .byte %c0\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" :: "i" (feature), ##input)
+
+/* Like alternative_input, but with a single output argument */
+#define alternative_io(oldinstr, newinstr, feature, output, input...) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661b\n" /* label */ \
+ " .quad 663f\n" /* new instruction */ \
+ " .byte %c[feat]\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" : output : [feat] "i" (feature), ##input)
+
+/*
+ * Alternative inline assembly for SMP.
+ *
+ * alternative_smp() takes two versions (SMP first, UP second) and is
+ * for more complex stuff such as spinlocks.
+ *
+ * The LOCK_PREFIX macro defined here replaces the LOCK and
+ * LOCK_PREFIX macros used everywhere in the source tree.
+ *
+ * SMP alternatives use the same data structures as the other
+ * alternatives and the X86_FEATURE_UP flag to indicate the case of a
+ * UP system running a SMP kernel. The existing apply_alternatives()
+ * works fine for patching a SMP kernel for UP.
+ *
+ * The SMP alternative tables can be kept after boot and contain both
+ * UP and SMP versions of the instructions to allow switching back to
+ * SMP at runtime, when hotplugging in a new CPU, which is especially
+ * useful in virtualized environments.
+ *
+ * The very common lock prefix is handled as special case in a
+ * separate table which is a pure address list without replacement ptr
+ * and size information. That keeps the table sizes small.
+ */
+
+#ifdef CONFIG_SMP
+#define alternative_smp(smpinstr, upinstr, args...) \
+ asm volatile ("661:\n\t" smpinstr "\n662:\n" \
+ ".section .smp_altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661b\n" /* label */ \
+ " .quad 663f\n" /* new instruction */ \
+ " .byte 0x66\n" /* X86_FEATURE_UP */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .smp_altinstr_replacement,\"awx\"\n" \
+ "663:\n\t" upinstr "\n" /* replacement */ \
+ "664:\n\t.fill 662b-661b,1,0x42\n" /* space for original */ \
+ ".previous" : args)
+
+#define LOCK_PREFIX \
+ ".section .smp_locks,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661f\n" /* address */ \
+ ".previous\n" \
+ "661:\n\tlock; "
+
+#else /* ! CONFIG_SMP */
+#define alternative_smp(smpinstr, upinstr, args...) \
+ asm volatile (upinstr : args)
+#define LOCK_PREFIX ""
+#endif
+
+#endif /* _X86_64_ALTERNATIVE_H */
Index: vanilla-2.6.17-rc1/include/asm-x86_64/atomic.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/atomic.h 2006-04-11 14:04:17.000000000 +0200
+++ vanilla-2.6.17-rc1/include/asm-x86_64/atomic.h 2006-04-11 15:50:51.000000000 +0200
@@ -2,21 +2,13 @@
#define __ARCH_X86_64_ATOMIC__
#include <linux/config.h>
-#include <asm/types.h>
-
-/* atomic_t should be 32 bit signed type */
+#include <asm/alternative.h>
/*
* Atomic operations that C can't guarantee us. Useful for
* resource counting etc..
*/
-#ifdef CONFIG_SMP
-#define LOCK "lock ; "
-#else
-#define LOCK ""
-#endif
-
/*
* Make sure gcc doesn't try to be clever and move things around
* on us. We need to use _exactly_ the address the user gave us,
@@ -53,7 +45,7 @@ typedef struct { volatile int counter; }
static __inline__ void atomic_add(int i, atomic_t *v)
{
__asm__ __volatile__(
- LOCK "addl %1,%0"
+ LOCK_PREFIX "addl %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
@@ -68,7 +60,7 @@ static __inline__ void atomic_add(int i,
static __inline__ void atomic_sub(int i, atomic_t *v)
{
__asm__ __volatile__(
- LOCK "subl %1,%0"
+ LOCK_PREFIX "subl %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
@@ -87,7 +79,7 @@ static __inline__ int atomic_sub_and_tes
unsigned char c;
__asm__ __volatile__(
- LOCK "subl %2,%0; sete %1"
+ LOCK_PREFIX "subl %2,%0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
@@ -102,7 +94,7 @@ static __inline__ int atomic_sub_and_tes
static __inline__ void atomic_inc(atomic_t *v)
{
__asm__ __volatile__(
- LOCK "incl %0"
+ LOCK_PREFIX "incl %0"
:"=m" (v->counter)
:"m" (v->counter));
}
@@ -116,7 +108,7 @@ static __inline__ void atomic_inc(atomic
static __inline__ void atomic_dec(atomic_t *v)
{
__asm__ __volatile__(
- LOCK "decl %0"
+ LOCK_PREFIX "decl %0"
:"=m" (v->counter)
:"m" (v->counter));
}
@@ -134,7 +126,7 @@ static __inline__ int atomic_dec_and_tes
unsigned char c;
__asm__ __volatile__(
- LOCK "decl %0; sete %1"
+ LOCK_PREFIX "decl %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
@@ -153,7 +145,7 @@ static __inline__ int atomic_inc_and_tes
unsigned char c;
__asm__ __volatile__(
- LOCK "incl %0; sete %1"
+ LOCK_PREFIX "incl %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
@@ -173,7 +165,7 @@ static __inline__ int atomic_add_negativ
unsigned char c;
__asm__ __volatile__(
- LOCK "addl %2,%0; sets %1"
+ LOCK_PREFIX "addl %2,%0; sets %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
@@ -190,7 +182,7 @@ static __inline__ int atomic_add_return(
{
int __i = i;
__asm__ __volatile__(
- LOCK "xaddl %0, %1;"
+ LOCK_PREFIX "xaddl %0, %1;"
:"=r"(i)
:"m"(v->counter), "0"(i));
return i + __i;
@@ -238,7 +230,7 @@ typedef struct { volatile long counter;
static __inline__ void atomic64_add(long i, atomic64_t *v)
{
__asm__ __volatile__(
- LOCK "addq %1,%0"
+ LOCK_PREFIX "addq %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
@@ -253,7 +245,7 @@ static __inline__ void atomic64_add(long
static __inline__ void atomic64_sub(long i, atomic64_t *v)
{
__asm__ __volatile__(
- LOCK "subq %1,%0"
+ LOCK_PREFIX "subq %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
@@ -272,7 +264,7 @@ static __inline__ int atomic64_sub_and_t
unsigned char c;
__asm__ __volatile__(
- LOCK "subq %2,%0; sete %1"
+ LOCK_PREFIX "subq %2,%0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
@@ -287,7 +279,7 @@ static __inline__ int atomic64_sub_and_t
static __inline__ void atomic64_inc(atomic64_t *v)
{
__asm__ __volatile__(
- LOCK "incq %0"
+ LOCK_PREFIX "incq %0"
:"=m" (v->counter)
:"m" (v->counter));
}
@@ -301,7 +293,7 @@ static __inline__ void atomic64_inc(atom
static __inline__ void atomic64_dec(atomic64_t *v)
{
__asm__ __volatile__(
- LOCK "decq %0"
+ LOCK_PREFIX "decq %0"
:"=m" (v->counter)
:"m" (v->counter));
}
@@ -319,7 +311,7 @@ static __inline__ int atomic64_dec_and_t
unsigned char c;
__asm__ __volatile__(
- LOCK "decq %0; sete %1"
+ LOCK_PREFIX "decq %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
@@ -338,7 +330,7 @@ static __inline__ int atomic64_inc_and_t
unsigned char c;
__asm__ __volatile__(
- LOCK "incq %0; sete %1"
+ LOCK_PREFIX "incq %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
@@ -358,7 +350,7 @@ static __inline__ int atomic64_add_negat
unsigned char c;
__asm__ __volatile__(
- LOCK "addq %2,%0; sets %1"
+ LOCK_PREFIX "addq %2,%0; sets %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
@@ -375,7 +367,7 @@ static __inline__ long atomic64_add_retu
{
long __i = i;
__asm__ __volatile__(
- LOCK "xaddq %0, %1;"
+ LOCK_PREFIX "xaddq %0, %1;"
:"=r"(i)
:"m"(v->counter), "0"(i));
return i + __i;
@@ -419,11 +411,11 @@ static __inline__ long atomic64_sub_retu
/* These are x86-specific, used by some header files */
#define atomic_clear_mask(mask, addr) \
-__asm__ __volatile__(LOCK "andl %0,%1" \
+__asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \
: : "r" (~(mask)),"m" (*addr) : "memory")
#define atomic_set_mask(mask, addr) \
-__asm__ __volatile__(LOCK "orl %0,%1" \
+__asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \
: : "r" ((unsigned)mask),"m" (*(addr)) : "memory")
/* Atomic operations are already serializing on x86 */
Index: vanilla-2.6.17-rc1/include/asm-x86_64/bitops.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/bitops.h 2006-04-11 14:04:17.000000000 +0200
+++ vanilla-2.6.17-rc1/include/asm-x86_64/bitops.h 2006-04-11 14:47:47.000000000 +0200
@@ -6,12 +6,7 @@
*/
#include <linux/config.h>
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX "lock ; "
-#else
-#define LOCK_PREFIX ""
-#endif
+#include <asm/alternative.h>
#define ADDR (*(volatile long *) addr)
Index: vanilla-2.6.17-rc1/include/asm-x86_64/cpufeature.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/cpufeature.h 2006-03-20 06:53:29.000000000 +0100
+++ vanilla-2.6.17-rc1/include/asm-x86_64/cpufeature.h 2006-04-11 15:15:18.000000000 +0200
@@ -64,6 +64,7 @@
#define X86_FEATURE_REP_GOOD (3*32+ 4) /* rep microcode works well on this CPU */
#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */
+#define X86_FEATURE_UP (3*32+ 7) /* SMP kernel running on UP */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
Index: vanilla-2.6.17-rc1/include/asm-x86_64/rwlock.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/rwlock.h 2006-04-11 15:55:50.000000000 +0200
+++ vanilla-2.6.17-rc1/include/asm-x86_64/rwlock.h 2006-04-11 15:57:07.000000000 +0200
@@ -24,7 +24,7 @@
#define RW_LOCK_BIAS_STR "0x01000000"
#define __build_read_lock_ptr(rw, helper) \
- asm volatile(LOCK "subl $1,(%0)\n\t" \
+ asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \
"js 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
@@ -34,7 +34,7 @@
::"a" (rw) : "memory")
#define __build_read_lock_const(rw, helper) \
- asm volatile(LOCK "subl $1,%0\n\t" \
+ asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \
"js 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
@@ -54,7 +54,7 @@
} while (0)
#define __build_write_lock_ptr(rw, helper) \
- asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+ asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
"jnz 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
@@ -64,7 +64,7 @@
::"a" (rw) : "memory")
#define __build_write_lock_const(rw, helper) \
- asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
+ asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
"jnz 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
Index: vanilla-2.6.17-rc1/include/asm-x86_64/spinlock.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/spinlock.h 2006-03-20 06:53:29.000000000 +0100
+++ vanilla-2.6.17-rc1/include/asm-x86_64/spinlock.h 2006-04-11 14:47:47.000000000 +0200
@@ -32,15 +32,19 @@
"jmp 1b\n" \
LOCK_SECTION_END
+#define __raw_spin_lock_string_up \
+ "\n\tdecl %0"
+
#define __raw_spin_unlock_string \
"movl $1,%0" \
:"=m" (lock->slock) : : "memory"
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
- __asm__ __volatile__(
- __raw_spin_lock_string
- :"=m" (lock->slock) : : "memory");
+ alternative_smp(
+ __raw_spin_lock_string,
+ __raw_spin_lock_string_up,
+ "=m" (lock->slock) : : "memory");
}
#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
Index: vanilla-2.6.17-rc1/include/asm-x86_64/system.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/system.h 2006-04-11 14:04:17.000000000 +0200
+++ vanilla-2.6.17-rc1/include/asm-x86_64/system.h 2006-04-11 16:09:38.000000000 +0200
@@ -4,15 +4,10 @@
#include <linux/config.h>
#include <linux/kernel.h>
#include <asm/segment.h>
+#include <asm/alternative.h>
#ifdef __KERNEL__
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX "lock ; "
-#else
-#define LOCK_PREFIX ""
-#endif
-
#define __STR(x) #x
#define STR(x) __STR(x)
@@ -35,7 +30,7 @@
"thread_return:\n\t" \
"movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
- LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
+ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
"jc ret_from_fork\n\t" \
RESTORE_CONTEXT \
@@ -70,82 +65,6 @@ extern void load_gs_index(unsigned);
".previous" \
: :"r" (value), "r" (0))
-#ifdef __KERNEL__
-struct alt_instr {
- __u8 *instr; /* original instruction */
- __u8 *replacement;
- __u8 cpuid; /* cpuid bit set for replacement */
- __u8 instrlen; /* length of original instruction */
- __u8 replacementlen; /* length of new instruction, <= instrlen */
- __u8 pad[5];
-};
-#endif
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 8\n" \
- " .quad 661b\n" /* label */ \
- " .quad 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Peculiarities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 8\n" \
- " .quad 661b\n" /* label */ \
- " .quad 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 8\n" \
- " .quad 661b\n" /* label */ \
- " .quad 663f\n" /* new instruction */ \
- " .byte %c[feat]\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" : output : [feat] "i" (feature), ##input)
-
/*
* Clear and set 'TS' bit respectively
*/
@@ -367,5 +286,6 @@ static inline unsigned long __cmpxchg(vo
void cpu_idle_wait(void);
extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
#endif
Index: vanilla-2.6.17-rc1/arch/i386/kernel/alternative.c
===================================================================
--- vanilla-2.6.17-rc1.orig/arch/i386/kernel/alternative.c 2006-04-11 14:04:02.000000000 +0200
+++ vanilla-2.6.17-rc1/arch/i386/kernel/alternative.c 2006-04-12 09:28:56.000000000 +0200
@@ -4,27 +4,41 @@
#include <asm/alternative.h>
#include <asm/sections.h>
-#define DEBUG 0
-#if DEBUG
-# define DPRINTK(fmt, args...) printk(fmt, args)
-#else
-# define DPRINTK(fmt, args...)
-#endif
+static int no_replacement = 0;
+static int smp_alt_once = 0;
+static int debug_alternative = 0;
+
+static int __init noreplacement_setup(char *s)
+{
+ no_replacement = 1;
+ return 1;
+}
+static int __init bootonly(char *str)
+{
+ smp_alt_once = 1;
+ return 1;
+}
+static int __init debug_alt(char *str)
+{
+ debug_alternative = 1;
+ return 1;
+}
+__setup("noreplacement", noreplacement_setup);
+__setup("smp-alt-boot", bootonly);
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...) if (debug_alternative) \
+ printk(KERN_DEBUG fmt, args)
+
+#ifdef GENERIC_NOP1
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
+extern unsigned char intelnops[];
static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP
intelnops + 1 + 2 + 3 + 4 + 5 + 6,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K8_NOP1
+asm("\t.data\nk8nops: "
+ K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+ K8_NOP7 K8_NOP8);
+extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MA
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K7_NOP1
+asm("\t.data\nk7nops: "
+ K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+ K7_NOP7 K7_NOP8);
+extern unsigned char k7nops[];
static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MA
k7nops + 1 + 2 + 3 + 4 + 5 + 6,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef CONFIG_X86_64
+
+extern char __vsyscall_0;
+static inline unsigned char** find_nop_table(void)
+{
+ return k8_nops;
+}
+
+#else /* CONFIG_X86_64 */
+
static struct nop {
int cpuid;
unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
{ -1, NULL }
};
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-
static unsigned char** find_nop_table(void)
{
unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(vo
return noptable;
}
+#endif /* CONFIG_X86_64 */
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr
{
unsigned char **noptable = find_nop_table();
struct alt_instr *a;
+ u8 *instr;
int diff, i, k;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr
BUG_ON(a->replacementlen > a->instrlen);
if (!boot_cpu_has(a->cpuid))
continue;
- memcpy(a->instr, a->replacement, a->replacementlen);
+ instr = a->instr;
+#ifdef CONFIG_X86_64
+ /* vsyscall code is not mapped yet. resolve it manually. */
+ if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
+ instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
+ DPRINTK("%s: vsyscall fixup: %p => %p\n",
+ __FUNCTION__, a->instr, instr);
+ }
+#endif
+ memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
-static int smp_alt_once = 0;
-static int __init bootonly(char *str)
-{
- smp_alt_once = 1;
- return 1;
-}
-__setup("smp-alt-boot", bootonly);
-
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct
struct smp_alt_module *smp;
unsigned long flags;
+ if (no_replacement)
+ return;
+
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct
struct smp_alt_module *item;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
struct smp_alt_module *mod;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
void __init alternative_instructions(void)
{
+ if (no_replacement) {
+ printk(KERN_INFO "(SMP-)alternatives turned off\n");
+ free_init_pages("SMP alternatives",
+ (unsigned long)__smp_alt_begin,
+ (unsigned long)__smp_alt_end);
+ return;
+ }
apply_alternatives(__alt_instructions, __alt_instructions_end);
/* switch to patch-once-at-boottime-only mode and free the
Index: vanilla-2.6.17-rc1/include/asm-i386/alternative.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-i386/alternative.h 2006-04-11 14:04:16.000000000 +0200
+++ vanilla-2.6.17-rc1/include/asm-i386/alternative.h 2006-04-11 14:59:45.000000000 +0200
@@ -3,6 +3,8 @@
#ifdef __KERNEL__
+#include <linux/types.h>
+
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
Index: vanilla-2.6.17-rc1/include/asm-x86_64/mutex.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/mutex.h 2006-03-20 06:53:29.000000000 +0100
+++ vanilla-2.6.17-rc1/include/asm-x86_64/mutex.h 2006-04-11 16:10:57.000000000 +0200
@@ -24,7 +24,7 @@ do { \
typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
\
__asm__ __volatile__( \
- LOCK " decl (%%rdi) \n" \
+ LOCK_PREFIX " decl (%%rdi) \n" \
" js 2f \n" \
"1: \n" \
\
@@ -74,7 +74,7 @@ do { \
typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
\
__asm__ __volatile__( \
- LOCK " incl (%%rdi) \n" \
+ LOCK_PREFIX " incl (%%rdi) \n" \
" jle 2f \n" \
"1: \n" \
\
Index: vanilla-2.6.17-rc1/include/asm-x86_64/semaphore.h
===================================================================
--- vanilla-2.6.17-rc1.orig/include/asm-x86_64/semaphore.h 2006-03-20 06:53:29.000000000 +0100
+++ vanilla-2.6.17-rc1/include/asm-x86_64/semaphore.h 2006-04-11 15:58:27.000000000 +0200
@@ -106,7 +106,7 @@ static inline void down(struct semaphore
__asm__ __volatile__(
"# atomic down operation\n\t"
- LOCK "decl %0\n\t" /* --sem->count */
+ LOCK_PREFIX "decl %0\n\t" /* --sem->count */
"js 2f\n"
"1:\n"
LOCK_SECTION_START("")
@@ -130,7 +130,7 @@ static inline int down_interruptible(str
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
- LOCK "decl %1\n\t" /* --sem->count */
+ LOCK_PREFIX "decl %1\n\t" /* --sem->count */
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
@@ -154,7 +154,7 @@ static inline int down_trylock(struct se
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
- LOCK "decl %1\n\t" /* --sem->count */
+ LOCK_PREFIX "decl %1\n\t" /* --sem->count */
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
@@ -178,7 +178,7 @@ static inline void up(struct semaphore *
{
__asm__ __volatile__(
"# atomic up operation\n\t"
- LOCK "incl %0\n\t" /* ++sem->count */
+ LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
"jle 2f\n"
"1:\n"
LOCK_SECTION_START("")
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]