Re: [PATCH] stop x86 ->sysdata abuse; introduce pci_sysdata

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 05, 2007 at 01:29:05PM +0300, Muli Ben-Yehuda wrote:
> On Mon, Jun 04, 2007 at 05:05:51PM -0400, Jeff Garzik wrote:
> > 
> > This patch introduces struct pci_sysdata to x86 and x86-64, and
> > converts the existing two users (NUMA, Calgary) to use it.
> > 
> > This eliminates the conflict between NUMA and Calgary using the same
> > pointer for different uses, and lays the groundwork for adding x86
> > PCI domain support.
> 
> Thanks for the patch. I am testing with Calgary and will push upstream
> through the next batch of Calgary updates when it will be ready. At
> the moment it doesn't boot on one of my test machines, I'm looking
> into it.

Ok, patch fixed, works for me with Calgary. Andi, it looks like you
added the acpi.c NUMA bits originally, perhaps you could test and/or
ack them?

This patch introduces struct pci_sysdata to x86 and x86-64, and
converts the existing two users (NUMA, Calgary) to use it.

This lays the groundwork for having other users of sysdata, such as
the PCI domains work.

Signed-off-by: Jeff Garzik <[email protected]>
Signed-off-by: Muli Ben-Yehuda <[email protected]>

---

NOTE: Calgary bits are tested, NUMA bits need to be tested.  

NOTE: I removed the PCI domains bits in Jeff's original patch so that
this patch only deals with ->sysdata users which are in mainline at
the moment. The PCI domains bits can be trivially added.

diff -r 05804111dbee arch/i386/pci/acpi.c
--- a/arch/i386/pci/acpi.c	Tue Jun 05 11:34:51 2007 +0300
+++ b/arch/i386/pci/acpi.c	Wed Jun 06 22:49:08 2007 +0300
@@ -8,20 +8,42 @@ struct pci_bus * __devinit pci_acpi_scan
 struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
 {
 	struct pci_bus *bus;
+	struct pci_sysdata *sd;
+	int pxm;
+
+	/* Allocate per-root-bus (not per bus) arch-specific data.
+	 * TODO: leak; this memory is never freed.
+	 * It's arguable whether it's worth the trouble to care.
+	 */
+	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+	if (!sd) {
+		printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+		return NULL;
+	}
 
 	if (domain != 0) {
 		printk(KERN_WARNING "PCI: Multiple domains not supported\n");
+		kfree(sd);
 		return NULL;
 	}
 
-	bus = pcibios_scan_root(busnum);
+	sd->node = -1;
+
+	pxm = acpi_get_pxm(device->handle);
+#ifdef CONFIG_ACPI_NUMA
+	if (pxm >= 0)
+		sd->node = pxm_to_node(pxm);
+#endif
+
+	bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
+	if (!bus)
+		kfree(sd);
+
 #ifdef CONFIG_ACPI_NUMA
 	if (bus != NULL) {
-		int pxm = acpi_get_pxm(device->handle);
 		if (pxm >= 0) {
-			bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
-			printk("bus %d -> pxm %d -> node %ld\n",
-				busnum, pxm, (long)(bus->sysdata));
+			printk("bus %d -> pxm %d -> node %d\n",
+				busnum, pxm, sd->node);
 		}
 	}
 #endif
diff -r 05804111dbee arch/i386/pci/common.c
--- a/arch/i386/pci/common.c	Tue Jun 05 11:34:51 2007 +0300
+++ b/arch/i386/pci/common.c	Wed Jun 06 22:36:53 2007 +0300
@@ -29,12 +29,14 @@ struct pci_raw_ops *raw_pci_ops;
 
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
 {
-	return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+	return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+				 devfn, where, size, value);
 }
 
 static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
 {
-	return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+	return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+				  devfn, where, size, value);
 }
 
 struct pci_ops pci_root_ops = {
@@ -293,6 +295,7 @@ struct pci_bus * __devinit pcibios_scan_
 struct pci_bus * __devinit pcibios_scan_root(int busnum)
 {
 	struct pci_bus *bus = NULL;
+	struct pci_sysdata *sd;
 
 	dmi_check_system(pciprobe_dmi_table);
 
@@ -303,9 +306,19 @@ struct pci_bus * __devinit pcibios_scan_
 		}
 	}
 
+	/* Allocate per-root-bus (not per bus) arch-specific data.
+	 * TODO: leak; this memory is never freed.
+	 * It's arguable whether it's worth the trouble to care.
+	 */
+	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+	if (!sd) {
+		printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+		return NULL;
+	}
+
 	printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
 
-	return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
+	return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
 }
 
 extern u8 pci_cache_line_size;
diff -r 05804111dbee arch/x86_64/kernel/pci-calgary.c
--- a/arch/x86_64/kernel/pci-calgary.c	Tue Jun 05 11:34:51 2007 +0300
+++ b/arch/x86_64/kernel/pci-calgary.c	Wed Jun 06 23:27:53 2007 +0300
@@ -375,7 +375,7 @@ static inline struct iommu_table *find_i
 	else
 		pbus = pdev->bus;
 
-	tbl = pbus->self->sysdata;
+	tbl = pci_iommu(pbus);
 
 	BUG_ON(pdev->bus->parent &&
 	       (tbl->it_busno != pdev->bus->parent->number));
@@ -718,7 +718,7 @@ static void __init calgary_reserve_mem_r
 	limit++;
 
 	numpages = ((limit - start) >> PAGE_SHIFT);
-	iommu_range_reserve(dev->sysdata, start, numpages);
+	iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
 }
 
 static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
@@ -726,7 +726,7 @@ static void __init calgary_reserve_perip
 	void __iomem *target;
 	u64 low, high, sizelow;
 	u64 start, limit;
-	struct iommu_table *tbl = dev->sysdata;
+	struct iommu_table *tbl = pci_iommu(dev->bus);
 	unsigned char busnum = dev->bus->number;
 	void __iomem *bbar = tbl->bbar;
 
@@ -750,7 +750,7 @@ static void __init calgary_reserve_perip
 	u32 val32;
 	u64 low, high, sizelow, sizehigh;
 	u64 start, limit;
-	struct iommu_table *tbl = dev->sysdata;
+	struct iommu_table *tbl = pci_iommu(dev->bus);
 	unsigned char busnum = dev->bus->number;
 	void __iomem *bbar = tbl->bbar;
 
@@ -786,7 +786,7 @@ static void __init calgary_reserve_regio
 {
 	unsigned int npages;
 	u64 start;
-	struct iommu_table *tbl = dev->sysdata;
+	struct iommu_table *tbl = pci_iommu(dev->bus);
 	void __iomem *target;
 	u32 val1, val2;
 	unsigned char busnum = dev->bus->number;
@@ -831,7 +831,7 @@ static int __init calgary_setup_tar(stru
 	if (ret)
 		return ret;
 
-	tbl = dev->sysdata;
+	tbl = pci_iommu(dev->bus);
 	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
 	tce_free(tbl, 0, tbl->it_size);
 
@@ -868,7 +868,7 @@ static void __init calgary_free_bus(stru
 static void __init calgary_free_bus(struct pci_dev *dev)
 {
 	u64 val64;
-	struct iommu_table *tbl = dev->sysdata;
+	struct iommu_table *tbl = pci_iommu(dev->bus);
 	void __iomem *target;
 	unsigned int bitmapsz;
 
@@ -883,7 +883,8 @@ static void __init calgary_free_bus(stru
 	tbl->it_map = NULL;
 
 	kfree(tbl);
-	dev->sysdata = NULL;
+	
+	set_pci_iommu(dev->bus, NULL);
 
 	/* Can't free bootmem allocated memory after system is up :-( */
 	bus_info[dev->bus->number].tce_space = NULL;
@@ -956,7 +957,7 @@ static void calgary_watchdog(unsigned lo
 static void calgary_watchdog(unsigned long data)
 {
 	struct pci_dev *dev = (struct pci_dev *)data;
-	struct iommu_table *tbl = dev->sysdata;
+	struct iommu_table *tbl = pci_iommu(dev->bus);
 	void __iomem *bbar = tbl->bbar;
 	u32 val32;
 	void __iomem *target;
@@ -1056,7 +1057,7 @@ static void __init calgary_enable_transl
 	struct iommu_table *tbl;
 
 	busnum = dev->bus->number;
-	tbl = dev->sysdata;
+	tbl = pci_iommu(dev->bus);
 	bbar = tbl->bbar;
 
 	/* dump the configuration register */
@@ -1093,7 +1094,7 @@ static void __init calgary_disable_trans
 	struct iommu_table *tbl;
 
 	busnum = dev->bus->number;
-	tbl = dev->sysdata;
+	tbl = pci_iommu(dev->bus);
 	bbar = tbl->bbar;
 
 	/* disable TCE in PHB Config Register */
@@ -1111,7 +1112,7 @@ static void __init calgary_init_one_nont
 static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
 {
 	pci_dev_get(dev);
-	dev->sysdata = NULL;
+	set_pci_iommu(dev->bus, NULL);
 
 	/* is the device behind a bridge? */
 	if (dev->bus->parent)
@@ -1144,7 +1145,7 @@ static int __init calgary_init_one(struc
 	else
 		dev->bus->self = dev;
 
-	tbl = dev->sysdata;
+	tbl = pci_iommu(dev->bus);
 	tbl->chip_ops->handle_quirks(tbl, dev);
 
 	calgary_enable_translation(dev);
@@ -1541,7 +1542,7 @@ static void __init calgary_fixup_one_tce
 	unsigned int npages;
 	int i;
 
-	tbl = dev->sysdata;
+	tbl = pci_iommu(dev->bus);
 
 	for (i = 0; i < 4; i++) {
 		struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i];
diff -r 05804111dbee arch/x86_64/kernel/tce.c
--- a/arch/x86_64/kernel/tce.c	Tue Jun 05 11:34:51 2007 +0300
+++ b/arch/x86_64/kernel/tce.c	Wed Jun 06 22:36:53 2007 +0300
@@ -136,9 +136,9 @@ int build_tce_table(struct pci_dev *dev,
 	struct iommu_table *tbl;
 	int ret;
 
-	if (dev->sysdata) {
-		printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
-		       dev, dev->sysdata);
+	if (pci_iommu(dev->bus)) {
+		printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
+		       dev, pci_iommu(dev->bus));
 		BUG();
 	}
 
@@ -155,11 +155,7 @@ int build_tce_table(struct pci_dev *dev,
 
 	tbl->bbar = bbar;
 
-	/*
-	 * NUMA is already using the bus's sysdata pointer, so we use
-	 * the bus's pci_dev's sysdata instead.
-	 */
-	dev->sysdata = tbl;
+	set_pci_iommu(dev->bus, tbl);
 
 	return 0;
 
diff -r 05804111dbee arch/x86_64/pci/k8-bus.c
--- a/arch/x86_64/pci/k8-bus.c	Tue Jun 05 11:34:51 2007 +0300
+++ b/arch/x86_64/pci/k8-bus.c	Wed Jun 06 22:36:53 2007 +0300
@@ -59,6 +59,8 @@ fill_mp_bus_to_cpumask(void)
 				     j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
 				     j++) { 
 					struct pci_bus *bus;
+					struct pci_sysdata *sd;
+
 					long node = NODE_ID(nid);
 					/* Algorithm a bit dumb, but
  					   it shouldn't matter here */
@@ -67,7 +69,9 @@ fill_mp_bus_to_cpumask(void)
 						continue;
 					if (!node_online(node))
 						node = 0;
-					bus->sysdata = (void *)node;
+
+					sd = bus->sysdata;
+					sd->node = node;
 				}		
 			}
 		}
diff -r 05804111dbee include/asm-i386/pci.h
--- a/include/asm-i386/pci.h	Tue Jun 05 11:34:51 2007 +0300
+++ b/include/asm-i386/pci.h	Wed Jun 06 22:48:04 2007 +0300
@@ -3,6 +3,11 @@
 
 
 #ifdef __KERNEL__
+
+struct pci_sysdata {
+	int		node;		/* NUMA node */
+};
+
 #include <linux/mm.h>		/* for struct page */
 
 /* Can be used to override the logic in pci_scan_bus for skipping
diff -r 05804111dbee include/asm-i386/topology.h
--- a/include/asm-i386/topology.h	Tue Jun 05 11:34:51 2007 +0300
+++ b/include/asm-i386/topology.h	Wed Jun 06 22:36:53 2007 +0300
@@ -67,7 +67,7 @@ static inline int node_to_first_cpu(int 
 	return first_cpu(mask);
 }
 
-#define pcibus_to_node(bus) ((long) (bus)->sysdata)
+#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
 #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
 
 /* sched_domains SD_NODE_INIT for NUMAQ machines */
diff -r 05804111dbee include/asm-x86_64/pci.h
--- a/include/asm-x86_64/pci.h	Tue Jun 05 11:34:51 2007 +0300
+++ b/include/asm-x86_64/pci.h	Wed Jun 06 22:48:15 2007 +0300
@@ -4,6 +4,25 @@
 #include <asm/io.h>
 
 #ifdef __KERNEL__
+
+struct pci_sysdata {
+	int		node;		/* NUMA node */
+	void*		iommu;		/* IOMMU private data */
+};
+
+#ifdef CONFIG_CALGARY_IOMMU
+static inline void* pci_iommu(struct pci_bus *bus)
+{
+	struct pci_sysdata *sd = bus->sysdata;
+	return sd->iommu;
+}
+
+static inline void set_pci_iommu(struct pci_bus *bus, void *val)
+{
+	struct pci_sysdata *sd = bus->sysdata;
+	sd->iommu = val;
+}
+#endif /* CONFIG_CALGARY_IOMMU */
 
 #include <linux/mm.h> /* for struct page */
 
diff -r 05804111dbee include/asm-x86_64/topology.h
--- a/include/asm-x86_64/topology.h	Tue Jun 05 11:34:51 2007 +0300
+++ b/include/asm-x86_64/topology.h	Wed Jun 06 22:36:53 2007 +0300
@@ -22,7 +22,7 @@ extern int __node_distance(int, int);
 #define parent_node(node)		(node)
 #define node_to_first_cpu(node) 	(first_cpu(node_to_cpumask[node]))
 #define node_to_cpumask(node)		(node_to_cpumask[node])
-#define pcibus_to_node(bus)		((long)(bus->sysdata))	
+#define pcibus_to_node(bus)	((struct pci_sysdata *)((bus)->sysdata))->node
 #define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
 
 #define numa_node_id()			read_pda(nodenumber)


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux