Re: [PATCH 2/3] x86-64: Calgary IOMMU - Calgary specific bits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Monday 20 March 2006 09:54, Muli Ben-Yehuda wrote:

> +/* register offsets inside the PHB space */


Call it host bridge here.

> +void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUM_NODES];
> +unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;

Why are these not static too?

> +static int translate_empty_slots __read_mostly = 0;
> +static int calgary_detected __read_mostly = 0;
> +
> +static void tce_cache_blast(struct iommu_table *tbl);
> +
> +/* enable this to stress test the chip's TCE cache */
> +#undef BLAST_TCE_CACHE_ON_UNMAP

This should be probably hooked into iommu=debug / CONFIG_IOMMU_DEBUG

> +static inline int valid_dma_direction(int dma_direction)
> +{
> +	return ((dma_direction == DMA_BIDIRECTIONAL) ||
> +		(dma_direction == DMA_TO_DEVICE) ||
> +		(dma_direction == DMA_FROM_DEVICE));
> +}

Don't we already check that in the callers?


> +
> +	offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
> +				       tbl->it_size, npages);
> +	if (offset == ~0UL) {
> +		tce_cache_blast(tbl);
> +		offset = find_next_zero_string(tbl->it_map, 0,
> +					       tbl->it_size, npages);
> +		if (offset == ~0UL)
> +			panic("Calgary: IOMMU full, fix the allocator.\n");

This should properly error out, not panic.


> +static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
> +	unsigned int npages, int direction)
> +{
> +	unsigned long entry, flags;
> +	dma_addr_t ret = bad_dma_address;
> +
> +	spin_lock_irqsave(&tbl->it_lock, flags);
> +
> +	entry = iommu_range_alloc(tbl, npages);
> +
> +	ret = entry << PAGE_SHIFT; /* set the return dma address */
> +
> +	if (unlikely(ret == bad_dma_address))
> +		goto error;
> +
> +	/* put the TCEs in the HW table */
> +	tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
> +		  direction);
> +	/* make sure HW sees the new TCEs */
> +	mb();
> +
> +	spin_unlock_irqrestore(&tbl->it_lock, flags);
> +
> +	return ret;

Does this imply vaddr is always page aligned? Otherwise you would need
to add the offset into the page.


> +	BUG_ON(!tbl);
> +
> +	spin_lock_irqsave(&tbl->it_lock, flags);

That is a useless BUG_ON. More occurrences.

> +	/* make sure updates are seen by hardware */
> +	mb();

Doesn't make sense on x86-64.


> +	dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
> +	if (dma_handle != bad_dma_address)
> +		dma_handle |= (uaddr & ~PAGE_MASK);

Would be cleaner to do it in iommu_alloc



+
> +static inline unsigned long split_queue_offset(unsigned char num)
etc.

Looks like these all should be array lookups.
+
> +	/* avoid the BIOS/VGA first 640KB-1MB region */
> +	start = (640 * 1024);
> +	npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
> +	iommu_range_reserve(tbl, start, npages);

Why only those and not the other PCI holes? And why at all?


> +static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
> +{
> +	u64 val64;
> +	u64 table_phys;
> +	void __iomem *target;
> +	int ret;
> +	struct iommu_table *tbl;
> +
> +	/* build TCE tables for each PHB */
> +	ret = build_tce_table(dev, bbar);
> +	if (ret)
> +		goto done;

That's a useless goto

>
> +done:
> +	return ret;
> +}
> +
> +static void __init calgary_free_tar(struct pci_dev *dev)
> +{
> +	u64 val64;
> +	struct iommu_table *tbl = dev->sysdata;
> +	void __iomem *target;
> +
> +	target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
> +	val64 = be64_to_cpu(readq(target));
> +	val64 &= ~TAR_SW_BITS;
> +	writeq(cpu_to_be64(val64), target);
> +	readq(target); /* flush */
> +
> +	kfree(tbl);
> +	dev->sysdata = NULL;

We already use dev->sysdata for the NUMA topology information from ACPI.
I think that will conflict. You need to define a new structure for 
all users.

> +	/*
> +	 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
> +	 * ChassisBase is always zero for x366/x260/x460
> +	 * RioNodeId is 2 for first Calgary, 3 for second Calgary
> +	 */
> +	address = 0xfe000000 - (0x800000 * (1 + dev->bus->number / 15)) +
> +		(0x100000) * (nodeid - 0);


Nasty magic numbers all over.

> +	return address;
> +}
> +
> +static int __init calgary_init_one(struct pci_dev *dev)
> +{
> +	u32 address;
> +	void __iomem *bbar;
> +	int ret;
> +
> +	address = locate_register_space(dev);
> +	/* map entire 1MB of Calgary config space */
> +	bbar = ioremap(address, 1024 * 1024);

ioremap_nocache

Where exactly is the isolation enabled?

> +	printk(KERN_INFO "PCI-DMA: detecting Calgary chipset...\n");

I want it to be quiet if Calgary is not there please.

> +	for (bus = 0, calgary = 0;
> +	     bus < MAX_NUM_OF_PHBS * num_online_nodes() * 2;
> +	     bus++) {

Yuck another full scan. We're nearly over the threshold where whoever
adds more of these has to write a proper function to encapsulate and possibly
cache all that.

> +		BUG_ON(bus >= MAX_PHB_BUS_NUM * MAX_NUM_NODES);
> +		if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
> +			continue;

> +		printk(KERN_ERR "PCI-DMA: Calgary init failed %x, "
> +		       "falling back to no_iommu\n", ret);

Shouldn't it rather fall back to swiotlb?

> +		if (end_pfn > MAX_DMA32_PFN)
> +			printk(KERN_ERR "WARNING more than 4GB of memory, "
> +					"32bit PCI may malfunction.\n");
> +		return ret;
> +	}
> +
> +	force_iommu = 1;

Why that?

> +	dma_ops = &calgary_dma_ops;
> +
> +	return ret;
> +}
> +
> +void __init calgary_parse_options(char *p)
> +{
> +	while (*p) {
> +		if (!strncmp(p, "64k", 3))
> +			specified_table_size = TCE_TABLE_SIZE_64K;
> +		else if (!strncmp(p, "128k", 4))
> +			specified_table_size = TCE_TABLE_SIZE_128K;
> +		else if (!strncmp(p, "256k", 4))
> +			specified_table_size = TCE_TABLE_SIZE_256K;
> +		else if (!strncmp(p, "512k", 4))
> +			specified_table_size = TCE_TABLE_SIZE_512K;
> +		else if (!strncmp(p, "1M", 2))
> +			specified_table_size = TCE_TABLE_SIZE_1M;
> +		else if (!strncmp(p, "2M", 2))
> +			specified_table_size = TCE_TABLE_SIZE_2M;
> +		else if (!strncmp(p, "4M", 2))
> +			specified_table_size = TCE_TABLE_SIZE_4M;
> +		else if (!strncmp(p, "8M", 2))
> +			specified_table_size = TCE_TABLE_SIZE_8M;

You forgot to document all these in Documentation/x86_64


> +	tce_free(tbl, 0, tbl->it_size);
> +	mb(); /* flush TCE table update */

I don't think it will do anything on x86-64


> +	p = __alloc_bootmem_low(size, size, 0);
> +	if (!p)
> +		printk(KERN_ERR "Calgary: cannot allocate TCE table of "
> +		       "size 0x%x\n", size);

It won't - it will panic. I have a patch to add alloc_bootmem_nopanic queued
though. You should use that later.

I would like to see a printk and some comments about the full isolation
because it's a big change. How does it interact with the X server for once?

The patch is not self contained because it doesn't have Kconfig?

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux