Re: [patch] mspec driver for 2.6.12-rc2-mm3

On Tue, Apr 12, 2005 at 10:50:08AM -0400, Jes Sorensen wrote:
> +config MSPEC
> +	tristate "Special Memory support"
> +	select GENERIC_ALLOCATOR

should depend on IA64_GENERIC || IA64_SGI_SN2 because it's using sn2
functions like bte_copy

> +#define BTE_ZERO_BLOCK(_maddr, _len) \
> +	bte_copy(0, _maddr - __IA64_UNCACHED_OFFSET, _len, BTE_WACQUIRE | BTE_ZERO_FILL, NULL)

should become an line function.

> +static int fetchop_mmap(struct file *file, struct vm_area_struct *vma);
> +static int cached_mmap(struct file *file, struct vm_area_struct *vma);
> +static int uncached_mmap(struct file *file, struct vm_area_struct *vma);
> +static void mspec_open(struct vm_area_struct *vma);
> +static void mspec_close(struct vm_area_struct *vma);
> +static struct page * mspec_nopage(struct vm_area_struct *vma,
> +					unsigned long address, int *unused);

please try to reorder the code to avoid forward-prototypes where easily
possible.

> +/*
> + * There is one of these structs per node. It is used to manage the mspec
> + * space that is available on the node. Current assumption is that there is
> + * only 1 mspec block of memory per node.
> + */
> +struct node_mspecs {
> +	long		maddr;		/* phys addr of start of mspecs. */
> +	int		count;		/* Total number of mspec pages. */
> +	atomic_t	free;		/* Number of pages currently free. */
> +	unsigned long	bits[1];	/* Bitmap for managing pages. */

Please use the bits[0] gcc extensions so all the calculations for the
variable sized array easily make sense.

> +/*
> + * One of these structures is allocated when an mspec region is mmaped. The
> + * structure is pointed to by the vma->vm_private_data field in the vma struct. 
> + * This structure is used to record the addresses of the mspec pages.
> + */
> +struct vma_data {
> +	atomic_t	refcnt;		/* Number of vmas sharing the data. */
> +	spinlock_t	lock;		/* Serialize access to the vma. */
> +	int		count;		/* Number of pages allocated. */
> +	int		type;		/* Type of pages allocated. */
> +	unsigned long	maddr[1];	/* Array of MSPEC addresses. */

dito

> +};
> +
> +
> +/*
> + * Memory Special statistics.
> + */
> +struct mspec_stats {
> +	atomic_t	map_count;	/* Number of active mmap's */
> +	atomic_t	pages_in_use;	/* Number of mspec pages in use */
> +	unsigned long	pages_total;	/* Total number of mspec pages */
> +};
> +
> +static struct mspec_stats	mspec_stats;
> +static struct node_mspecs	*node_mspecs[MAX_NUMNODES];
> +
> +#define MAX_UNCACHED_GRANULES	5
> +static int allocated_granules;
> +
> +struct gen_pool *mspec_pool[MAX_NUMNODES];

> +static unsigned long
> +mspec_get_new_chunk(struct gen_pool *poolp)
> +{
> +	struct page *page;
> +	void *tmp;
> +	int status, node, i;
> +	unsigned long addr;
> +
> +	if (allocated_granules >= MAX_UNCACHED_GRANULES)
> +		return 0;
> +
> +	node = (int)poolp->private;

maybe the private data in the genpool should be an union of
void * and unsigned long so we can avoid all those casrs?

> +	page = alloc_pages_node(node, GFP_KERNEL,
> +				IA64_GRANULE_SHIFT-PAGE_SHIFT);

> +	tmp = page_address(page);
> +	memset(tmp, 0, IA64_GRANULE_SIZE);

shouldn't you just use __GFP_ZERO?

> +#if DEBUG
> +	printk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
> +	       status, get_cpu());
> +#endif

same dprintk comment as for genalloc.

> +	vdata->lock = SPIN_LOCK_UNLOCKED;

you're supposed to use spin_lock_init() these days.

> +/*
> + * mspec_get_one_pte
> + *
> + * Return the pte for a given mm and address.
> + */
> +static __inline__ int
> +mspec_get_one_pte(struct mm_struct *mm, u64 address, pte_t **pte)
> +{
> +	pgd_t *pgd;
> +	pmd_t *pmd;
> +	pud_t *pud;
> +
> +	pgd = pgd_offset(mm, address);
> +	if (pgd_present(*pgd)) {
> +		pud = pud_offset(pgd, address);
> +		if (pud_present(*pud)) {
> +			pmd = pmd_offset(pud, address);
> +			if (pmd_present(*pmd)) {
> +				*pte = pte_offset_map(pmd, address);
> +				if (pte_present(**pte)) {
> +					return 0;
> +				}
> +			}
> +		}
> +	}
> +
> +	return -1;
> +}

> +	spin_lock(&vdata->lock);
> +
> +	index = (address - vma->vm_start) >> PAGE_SHIFT;
> +	if (vdata->maddr[index] == 0) {
> +		vdata->count++;
> +		maddr = mspec_alloc_page(numa_node_id(), vdata->type);

this looks like a page allocation under spinlock.

> +	/*
> +	 * The kernel requires a page structure to be returned upon
> +	 * success, but there are no page structures for low granule pages.
> +	 * remap_page_range() creates the pte for us and we return a
> +	 * bogus page back to the kernel fault handler to keep it happy
> +	 * (the page is freed immediately there).
> +	 */

Please don't use the ->nopage approach thenm but do remap_pfn_range
beforehand.  ->nopage is very nice if the region is actually backed by
normal RAM, but what you're doing doesn't make much sense.

> +/*
> + * Walk the EFI memory map to pull out leftover pages in the lower
> + * memory regions which do not end up in the regular memory map and
> + * stick them into the uncached allocator
> + */
> +static void __init
> +mspec_walk_efi_memmap_uc (void)

I'm pretty sure this was NACKed on the ia64 list, and SGI was told to do
a more generic efi memmap walk.

> +	/*
> +	 * The fetchop device only works on SN2 hardware, uncached and cached
> +	 * memory drivers should both be valid on all ia64 hardware
> +	 */

In which case my above comment about the depency doesn't make sense, but
you'll have to split the driver into separate files or add ifdefs.  Please
test it on some non-sgi hardware with a non-generic kernel build.

> +	printk(KERN_INFO "%s: v%s\n", FETCHOP_DRIVER_ID_STR, REVISION);
> +	printk(KERN_INFO "%s: v%s\n", CACHED_DRIVER_ID_STR, REVISION);
> +	printk(KERN_INFO "%s: v%s\n", UNCACHED_DRIVER_ID_STR, REVISION);

Please keep the noise level down and remove these.

> +unsigned long
> +mspec_kalloc_page(int nid)
> +{
> +	return TO_AMO(mspec_alloc_page(nid, MSPEC_FETCHOP));
> +}
> +
> +
> +void
> +mspec_kfree_page(unsigned long maddr)
> +{
> +	mspec_free_page(TO_PHYS(maddr) + __IA64_UNCACHED_OFFSET);
> +}
> +EXPORT_SYMBOL(mspec_kalloc_page);
> +EXPORT_SYMBOL(mspec_kfree_page);

What is this for?  these look like really odd APIs.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Follow-Ups:
- Re: [patch] mspec driver for 2.6.12-rc2-mm3
  - From: Jes Sorensen <[email protected]>

References:
- [patch] mspec driver for 2.6.12-rc2-mm3
  - From: [email protected] (Jes Sorensen)
- Re: [patch] mspec driver for 2.6.12-rc2-mm3
  - From: Andrew Morton <[email protected]>
- Re: [patch] mspec driver for 2.6.12-rc2-mm3
  - From: Jes Sorensen <[email protected]>

Prev by Date: Re: [Crosspost] GNU/Linux userland?
Next by Date: Re: Re: more git updates..
Previous by thread: Re: [patch] mspec driver for 2.6.12-rc2-mm3
Next by thread: Re: [patch] mspec driver for 2.6.12-rc2-mm3
Index(es):
- Date
- Thread

[Index of Archives] [Kernel Newbies] [Netfilter] [Bugtraq] [Photo] [Stuff] [Gimp] [Yosemite News] [MIPS Linux] [ARM Linux] [Linux Security] [Linux RAID] [Video 4 Linux] [Linux for the blind] [Linux Resources]