[PATCH 02/08] percpu -v2 module insertion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Simplify module.c handling of the per_cpu variables of modules.
Instead of copying the variables into a preallocated space in the
kernel, create the modules own area for per_cpu variables and update
the per_cpu_offset__##var section of the module to point to the
offset.

Signed-off-by: Steven Rostedt <[email protected]>

Index: linux-2.6.17-rc1/kernel/module.c
===================================================================
--- linux-2.6.17-rc1.orig/kernel/module.c	2006-04-13 22:37:57.000000000 -0400
+++ linux-2.6.17-rc1/kernel/module.c	2006-04-14 15:40:20.000000000 -0400
@@ -2,6 +2,10 @@
    Copyright (C) 2002 Richard Henderson
    Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
 
+   Modification of per_cpu variables such that modules contain
+   their own sections of these variables.
+     -  Copyright(C) 2006 Steven Rostedt, Kihon Technologies Inc.
+
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
@@ -237,36 +241,6 @@ static struct module *find_module(const 
 }
 
 #ifdef CONFIG_SMP
-/* Number of blocks used and allocated. */
-static unsigned int pcpu_num_used, pcpu_num_allocated;
-/* Size of each block.  -ve means used. */
-static int *pcpu_size;
-
-static int split_block(unsigned int i, unsigned short size)
-{
-	/* Reallocation required? */
-	if (pcpu_num_used + 1 > pcpu_num_allocated) {
-		int *new = kmalloc(sizeof(new[0]) * pcpu_num_allocated*2,
-				   GFP_KERNEL);
-		if (!new)
-			return 0;
-
-		memcpy(new, pcpu_size, sizeof(new[0])*pcpu_num_allocated);
-		pcpu_num_allocated *= 2;
-		kfree(pcpu_size);
-		pcpu_size = new;
-	}
-
-	/* Insert a new subblock */
-	memmove(&pcpu_size[i+1], &pcpu_size[i],
-		sizeof(pcpu_size[0]) * (pcpu_num_used - i));
-	pcpu_num_used++;
-
-	pcpu_size[i+1] -= size;
-	pcpu_size[i] = size;
-	return 1;
-}
-
 static inline unsigned int block_size(int val)
 {
 	if (val < 0)
@@ -280,8 +254,6 @@ extern char __per_cpu_start[], __per_cpu
 static void *percpu_modalloc(unsigned long size, unsigned long align,
 			     const char *name)
 {
-	unsigned long extra;
-	unsigned int i;
 	void *ptr;
 
 	if (align > SMP_CACHE_BYTES) {
@@ -289,68 +261,65 @@ static void *percpu_modalloc(unsigned lo
 		       name, align, SMP_CACHE_BYTES);
 		align = SMP_CACHE_BYTES;
 	}
+	size = ALIGN(size, SMP_CACHE_BYTES);
+
+	ptr = kmalloc(size * NR_CPUS, GFP_KERNEL);
+	if (!ptr)
+		printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
+		       size);
+	return ptr;
+}
 
-	ptr = __per_cpu_start;
-	for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-		/* Extra for alignment requirement. */
-		extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
-		BUG_ON(i == 0 && extra != 0);
+static void percpu_modfree(void *freeme, void *freemetoo)
+{
+	kfree(freeme);
+	kfree(freemetoo);
+}
 
-		if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
-			continue;
+static void *percpu_offset_setup(void *dest,
+				 unsigned long var_size,
+				 unsigned long sect_size)
+{
+	unsigned long *ptr = dest;
+	unsigned long *per_cpu_offset;
+	int i;
 
-		/* Transfer extra to previous block. */
-		if (pcpu_size[i-1] < 0)
-			pcpu_size[i-1] -= extra;
-		else
-			pcpu_size[i-1] += extra;
-		pcpu_size[i] -= extra;
-		ptr += extra;
-
-		/* Split block if warranted */
-		if (pcpu_size[i] - size > sizeof(unsigned long))
-			if (!split_block(i, size))
-				return NULL;
-
-		/* Mark allocated */
-		pcpu_size[i] = -pcpu_size[i];
-		return ptr;
-	}
+	/* make sure everything is smp cache aligned */
+	var_size = ALIGN(var_size, SMP_CACHE_BYTES);
 
-	printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
-	       size);
-	return NULL;
+	/* allocate the modules own per_cpu_offset pointer */
+	per_cpu_offset = kmalloc(sizeof(unsigned long) * NR_CPUS, GFP_KERNEL);
+	if (!per_cpu_offset)
+		return NULL;
+
+	/* Set the pointer to the index of each CPU area */
+	for (i = 0; i < NR_CPUS; i++)
+		per_cpu_offset[i] = (i * var_size);
+
+	/*
+	 * Now copy this offset to all of the percpu_offset pointers in the
+	 * percpu_offset section. This is how the per_cpu variables can find
+	 * the offset into this section from the per_cpu macros.
+	 */
+	for (i=0; i < sect_size; i++, ptr++)
+		*ptr = (unsigned long)per_cpu_offset;
+
+	return per_cpu_offset;
 }
 
-static void percpu_modfree(void *freeme)
+static void percpu_modcopy(void *pcpudst, const void *src,
+			   unsigned long size,
+			   unsigned long *per_cpu_offset)
 {
 	unsigned int i;
-	void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
 
-	/* First entry is core kernel percpu data. */
-	for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-		if (ptr == freeme) {
-			pcpu_size[i] = -pcpu_size[i];
-			goto free;
-		}
-	}
-	BUG();
+	if (!pcpudst)
+		return;
 
- free:
-	/* Merge with previous? */
-	if (pcpu_size[i-1] >= 0) {
-		pcpu_size[i-1] += pcpu_size[i];
-		pcpu_num_used--;
-		memmove(&pcpu_size[i], &pcpu_size[i+1],
-			(pcpu_num_used - i) * sizeof(pcpu_size[0]));
-		i--;
-	}
-	/* Merge with next? */
-	if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
-		pcpu_size[i] += pcpu_size[i+1];
-		pcpu_num_used--;
-		memmove(&pcpu_size[i+1], &pcpu_size[i+2],
-			(pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
+	for (i=0; i < NR_CPUS; i++) {
+		if (cpu_possible(i))
+			memcpy(pcpudst + per_cpu_offset[i],
+			       src, size);
 	}
 }
 
@@ -361,24 +330,13 @@ static unsigned int find_pcpusec(Elf_Ehd
 	return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
 }
 
-static int percpu_modinit(void)
+static unsigned int find_pcpuoffsetsec(Elf_Ehdr *hdr,
+				       Elf_Shdr *sechdrs,
+				       const char *secstrings)
 {
-	pcpu_num_used = 2;
-	pcpu_num_allocated = 2;
-	pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
-			    GFP_KERNEL);
-	/* Static in-kernel percpu data (used). */
-	pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES);
-	/* Free room. */
-	pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
-	if (pcpu_size[1] < 0) {
-		printk(KERN_ERR "No per-cpu room for modules.\n");
-		pcpu_num_used = 1;
-	}
+	return find_sec(hdr, sechdrs, secstrings, ".data.percpu_offset");
+}
 
-	return 0;
-}	
-__initcall(percpu_modinit);
 #else /* ... !CONFIG_SMP */
 static inline void *percpu_modalloc(unsigned long size, unsigned long align,
 				    const char *name)
@@ -389,14 +347,29 @@ static inline void percpu_modfree(void *
 {
 	BUG();
 }
+
+static inline void *percpu_offset_setup(void *dest,
+					unsigned long var_size,
+					unsigned long sect_size)
+{
+	return NULL;
+}
+
 static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
 					Elf_Shdr *sechdrs,
 					const char *secstrings)
 {
 	return 0;
 }
+static inline unsigned int find_pcpuoffsetsec(Elf_Ehdr *hdr,
+					      Elf_Shdr *sechdrs,
+					      const char *secstrings)
+{
+	return 0;
+}
 static inline void percpu_modcopy(void *pcpudst, const void *src,
-				  unsigned long size)
+				  unsigned long size,
+				  unsigned long *per_cpu_offset)
 {
 	/* pcpusec should be 0, and size of that section should be 0. */
 	BUG_ON(size != 0);
@@ -1061,7 +1034,7 @@ static void free_module(struct module *m
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
 	if (mod->percpu)
-		percpu_modfree(mod->percpu);
+		percpu_modfree(mod->percpu, mod->percpu_offset);
 
 	/* Finally, free the core (containing the module structure) */
 	module_free(mod, mod->module_core);
@@ -1411,13 +1384,14 @@ static struct module *load_module(void _
 	char *secstrings, *args, *modmagic, *strtab = NULL;
 	unsigned int i, symindex = 0, strindex = 0, setupindex, exindex,
 		exportindex, modindex, obsparmindex, infoindex, gplindex,
-		crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
-		gplfuturecrcindex;
+		crcindex, gplcrcindex, versindex, pcpuindex, pcpuoffsetindex,
+		gplfutureindex,	gplfuturecrcindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
 	struct exception_table_entry *extable;
 	mm_segment_t old_fs;
+	unsigned long *percpu_offset = NULL;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
 	       umod, len, uargs);
@@ -1502,6 +1476,7 @@ static struct module *load_module(void _
 	versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
 	infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
 	pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
+	pcpuoffsetindex = find_pcpuoffsetsec(hdr, sechdrs, secstrings);
 
 	/* Don't keep modinfo section */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1550,6 +1525,7 @@ static struct module *load_module(void _
 		goto free_mod;
 
 	if (pcpuindex) {
+		BUG_ON(!pcpuoffsetindex);
 		/* We have a special allocation for this section. */
 		percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
 					 sechdrs[pcpuindex].sh_addralign,
@@ -1598,9 +1574,20 @@ static struct module *load_module(void _
 		else
 			dest = mod->module_core + sechdrs[i].sh_entsize;
 
-		if (sechdrs[i].sh_type != SHT_NOBITS)
-			memcpy(dest, (void *)sechdrs[i].sh_addr,
-			       sechdrs[i].sh_size);
+		if (sechdrs[i].sh_type != SHT_NOBITS) {
+			if (i && i == pcpuoffsetindex) {
+				unsigned long *ret;
+				BUG_ON(!pcpuindex);
+				ret = percpu_offset_setup(dest,
+							  sechdrs[pcpuindex].sh_size,
+							  sechdrs[i].sh_size);
+				if (!ret)
+					goto cleanup; /* is this right? */
+				percpu_offset = ret;
+			} else
+				memcpy(dest, (void *)sechdrs[i].sh_addr,
+				       sechdrs[i].sh_size);
+		}
 		/* Update sh_addr to point to copy in image. */
 		sechdrs[i].sh_addr = (unsigned long)dest;
 		DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name);
@@ -1608,6 +1595,9 @@ static struct module *load_module(void _
 	/* Module has been moved. */
 	mod = (void *)sechdrs[modindex].sh_addr;
 
+	/* Update the percpu_offset here since we might miss it when copying */
+	mod->percpu_offset = percpu_offset;
+
 	/* Now we've moved module, initialize linked lists, etc. */
 	module_unload_init(mod);
 
@@ -1688,7 +1678,7 @@ static struct module *load_module(void _
 
 	/* Finally, copy percpu area over. */
 	percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
-		       sechdrs[pcpuindex].sh_size);
+		       sechdrs[pcpuindex].sh_size, mod->percpu_offset);
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
@@ -1753,7 +1743,7 @@ static struct module *load_module(void _
 	module_free(mod, mod->module_core);
  free_percpu:
 	if (percpu)
-		percpu_modfree(percpu);
+		percpu_modfree(percpu, percpu_offset);
  free_mod:
 	kfree(args);
  free_hdr:
Index: linux-2.6.17-rc1/include/linux/module.h
===================================================================
--- linux-2.6.17-rc1.orig/include/linux/module.h	2006-04-13 22:37:57.000000000 -0400
+++ linux-2.6.17-rc1/include/linux/module.h	2006-04-13 22:38:07.000000000 -0400
@@ -319,6 +319,7 @@ struct module
 
 	/* Per-cpu data. */
 	void *percpu;
+	unsigned long *percpu_offset;
 
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux