> +static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
> +{
> +#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
> +	C(0); C(1); C(2);
> +#undef C
> +}

Why not use for loop here? gcc should be able to optimize it...

> +#define load_TR_desc()

do {} while (0)...

> +#define load_gdt(dtr) do {						\
> +} while (0)

So you know the trick :-)

> +#define load_idt(dtr) HYPERVISOR_set_trap_table(xen_trap_table)
> +#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
> +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))

__volatile (not __volatile__?). could you just use 'asm volatile'
without __s instead?

