Re: Resend:[RFC/Patch] Robust futexes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



You might want to CC Andrew Morton , and Rusty Russell.

What is the status of the glibc side of this?

Daniel


On Tue, 2005-07-05 at 16:11 -0700, Todd Kneisel wrote:
> This is a resend of my patch to add robust futex support to the existing
> sys_futex system call. The patch applies to 2.6.12. Any comments or
> discussion will be welcome.
> 
> Changes since my last posted version:
> - Applies to 2.6.12, was 2.6.12-rc6
> - Added config option CONFIG_ROBUST_FUTEX, depends on existing CONFIG_FUTEX
>    and defaults to no.
> - Commented functions, using kernel-doc style comments
> - Cleaned up some CodingStyle violations
> 
> Sys_futex provides operations on futexes that can be local to a process, or
> shared between processes by placing the futex in shared memory. However, if
> a process terminates while it owns a locked shared futex, any other
> processes that use the same futex will hang.
> 
> With this patch, if a process terminates while it owns a locked robust
> futex, the ownership of the lock will be transferred to the next waiting
> process, the waiting process will be awakened and will receive the status
> EOWNERDEAD. If there is no waiting process at the time of termination, then
> the next process that attempts to wait will receive ownership of the futex
> and the EOWNERDEAD status. The new owner can recover the futex and unlock it,
> in which case the futex can continue to be used. If the new owner only
> unlocks the futex, then the futex becomes unrecoverable and any attempt to
> use the futex will get the status ENOTRECOVERABLE.
> 
> The patch does not change the existing sys_futex operations on non-robust
> mutexes, so the patch should not affect existing code that uses futexes.
> New op codes are added to the sys_futex system call for use by code that
> requires robust futexes. I have a patch to glibc and the nptl thread
> library that uses robust futexes. We are in the process of getting
> copyright assignments to the Free Software Foundation so that we can
> submit the glibc and nptl patches.
> 
> Robust futexes have a different format from the non-robust futexes.
> The non-robust futexes can have the values 0 (unlocked), 1 (locked) or
> 2 (locked with waiters). In a robust futex the high bit indicates if there
> are processes waiting on the futex, the next bit indicates if the owning
> process died, and the next bit indicates if the futex is not recoverable.
> The rest of the futex contains the pid of the task that owns the futex lock
> or zero if the futex is not locked.
> 
> Signed-off-by: Todd Kneisel <[email protected]>
> 
>   fs/dcache.c           |    3
>   fs/inode.c            |    2
>   include/linux/fs.h    |    4
>   include/linux/futex.h |   20 +
>   init/Kconfig          |   11
>   kernel/exit.c         |    3
>   kernel/futex.c        |  686 +++++++++++++++++++++++++++++++++++++++++++++++++-
>   7 files changed, 728 insertions(+), 1 deletion(-)
> 
> 
> diff -uprN -X dontdiff linux-2.6.12/fs/dcache.c linux-2.6.12-todd/fs/dcache.c
> --- linux-2.6.12/fs/dcache.c	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/fs/dcache.c	2005-06-20 10:44:40.738407891 -0700
> @@ -32,6 +32,7 @@
>   #include <linux/seqlock.h>
>   #include <linux/swap.h>
>   #include <linux/bootmem.h>
> +#include <linux/futex.h>
> 
>   /* #define DCACHE_DEBUG 1 */
> 
> @@ -158,6 +159,8 @@ repeat:
>   		return;
>   	}
> 
> +	futex_free_robust_list(dentry->d_inode);
> +
>   	/*
>   	 * AV: ->d_delete() is _NOT_ allowed to block now.
>   	 */
> diff -uprN -X dontdiff linux-2.6.12/fs/inode.c linux-2.6.12-todd/fs/inode.c
> --- linux-2.6.12/fs/inode.c	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/fs/inode.c	2005-06-20 15:08:36.428029628 -0700
> @@ -21,6 +21,7 @@
>   #include <linux/pagemap.h>
>   #include <linux/cdev.h>
>   #include <linux/bootmem.h>
> +#include <linux/futex.h>
> 
>   /*
>    * This is needed for the following functions:
> @@ -202,6 +203,7 @@ void inode_init_once(struct inode *inode
>   	INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
>   	spin_lock_init(&inode->i_lock);
>   	i_size_ordered_init(inode);
> +	futex_init_inode(inode);
>   }
> 
>   EXPORT_SYMBOL(inode_init_once);
> diff -uprN -X dontdiff linux-2.6.12/include/linux/fs.h linux-2.6.12-todd/include/linux/fs.h
> --- linux-2.6.12/include/linux/fs.h	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/include/linux/fs.h	2005-06-20 14:56:16.977970185 -0700
> @@ -350,6 +350,10 @@ struct address_space {
>   	spinlock_t		private_lock;	/* for use by the address_space */
>   	struct list_head	private_list;	/* ditto */
>   	struct address_space	*assoc_mapping;	/* ditto */
> +#ifdef CONFIG_ROBUST_FUTEX
> + 	struct list_head	robust_list;	/* list of robust futexes */
> + 	struct semaphore	robust_sem;	/* protect list of robust futexes */
> +#endif
>   } __attribute__((aligned(sizeof(long))));
>   	/*
>   	 * On most architectures that alignment is already the case; but
> diff -uprN -X dontdiff linux-2.6.12/include/linux/futex.h linux-2.6.12-todd/include/linux/futex.h
> --- linux-2.6.12/include/linux/futex.h	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/include/linux/futex.h	2005-06-20 16:43:46.293122664 -0700
> @@ -1,6 +1,8 @@
>   #ifndef _LINUX_FUTEX_H
>   #define _LINUX_FUTEX_H
> 
> +#include <linux/fs.h>
> +
>   /* Second argument to futex syscall */
> 
> 
> @@ -9,9 +11,27 @@
>   #define FUTEX_FD (2)
>   #define FUTEX_REQUEUE (3)
>   #define FUTEX_CMP_REQUEUE (4)
> +#define FUTEX_WAIT_ROBUST (5)
> +#define FUTEX_WAKE_ROBUST (6)
> +#define FUTEX_REGISTER (7)
> +#define FUTEX_DEREGISTER (8)
> +#define FUTEX_RECOVER (9)
> 
>   long do_futex(unsigned long uaddr, int op, int val,
>   		unsigned long timeout, unsigned long uaddr2, int val2,
>   		int val3);
> 
> +#ifdef CONFIG_ROBUST_FUTEX
> +  extern void futex_free_robust_list(struct inode *inode);
> +  extern void exit_futex(void);
> +  static inline void futex_init_inode(struct inode *inode) {
> +	INIT_LIST_HEAD(&inode->i_data.robust_list);
> +	init_MUTEX(&inode->i_data.robust_sem);
> +  }
> +#else
> +  static inline void futex_free_robust_list(struct inode *inode) { }
> +  static inline void exit_futex(void) { }
> +  static inline void futex_init_inode(struct inode *inode) { }
> +#endif
> +
>   #endif
> diff -uprN -X dontdiff linux-2.6.12/init/Kconfig linux-2.6.12-todd/init/Kconfig
> --- linux-2.6.12/init/Kconfig	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/init/Kconfig	2005-06-20 14:33:07.437696418 -0700
> @@ -312,6 +312,17 @@ config FUTEX
>   	  support for "fast userspace mutexes".  The resulting kernel may not
>   	  run glibc-based applications correctly.
> 
> +config ROBUST_FUTEX
> +	bool "Enable robust futex support" if EMBEDDED
> +	depends on FUTEX
> +	default n
> +	help
> +	  Enabling this option will cause the kernel to be built with support
> +	  for robust futexes. Robust futexes are an extension to futexes.
> +	  You should only enable this option if you have a specific application
> +	  that requires robust futexes, and you have a version of glibc and the
> +	  nptl thread libraries that provide robust mutexes.
> +	
>   config EPOLL
>   	bool "Enable eventpoll support" if EMBEDDED
>   	default y
> diff -uprN -X dontdiff linux-2.6.12/kernel/exit.c linux-2.6.12-todd/kernel/exit.c
> --- linux-2.6.12/kernel/exit.c	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/kernel/exit.c	2005-06-20 10:46:48.983033854 -0700
> @@ -28,6 +28,7 @@
>   #include <linux/cpuset.h>
>   #include <linux/syscalls.h>
>   #include <linux/signal.h>
> +#include <linux/futex.h>
> 
>   #include <asm/uaccess.h>
>   #include <asm/unistd.h>
> @@ -813,6 +814,8 @@ fastcall NORET_TYPE void do_exit(long co
>   	group_dead = atomic_dec_and_test(&tsk->signal->live);
>   	if (group_dead)
>   		acct_process(code);
> +
> + 	exit_futex();
>   	exit_mm(tsk);
> 
>   	exit_sem(tsk);
> diff -uprN -X dontdiff linux-2.6.12/kernel/futex.c linux-2.6.12-todd/kernel/futex.c
> --- linux-2.6.12/kernel/futex.c	2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/kernel/futex.c	2005-06-29 17:46:05.715369816 -0700
> @@ -8,6 +8,9 @@
>    *  Removed page pinning, fix privately mapped COW pages and other cleanups
>    *  (C) Copyright 2003, 2004 Jamie Lokier
>    *
> + *  Robust futexes added by Todd Kneisel
> + *  (C) Copyright 2005, Bull HN.
> + *
>    *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
>    *  enough at me, Linus for the original (flawed) idea, Matthew
>    *  Kirkwood for proof-of-concept implementation.
> @@ -91,6 +94,11 @@ struct futex_q {
>   	/* For fd, sigio sent using these. */
>   	int fd;
>   	struct file *filp;
> +	
> +#ifdef CONFIG_ROBUST_FUTEX
> +	/* used when transferring ownership */
> +	pid_t waiter_pid;
> +#endif
>   };
> 
>   /*
> @@ -718,6 +726,661 @@ out:
>   	return ret;
>   }
> 
> +
> +#ifdef CONFIG_ROBUST_FUTEX
> +
> +/*
> + * Robust futexes provide a locking mechanism that can be shared between
> + * user mode processes. The major difference between robust futexes and
> + * regular futexes is that when the owner of a robust futex dies, the
> + * next task waiting on the futex will be awakened, will get ownership
> + * of the futex lock, and will receive the error status EOWNERDEAD.
> + *
> + * A robust futex is a 32 bit integer stored in user mode shared memory.
> + * Bit 31 indicates that there are tasks waiting on the futex.
> + * Bit 30 indicates that the task that owned the futex has died.
> + * Bit 29 indicates that the futex is not recoverable and cannot be used.
> + * Bits 0-28 are the pid of the task that owns the futex lock, or zero if
> + * the futex is not locked.
> + */
> +
> +#define FUTEX_WAITERS		0x80000000
> +#define FUTEX_OWNER_DIED	0x40000000
> +#define FUTEX_NOT_RECOVERABLE	0x20000000
> +#define FUTEX_PID		0x1fffffff
> +
> +/*
> + * Used to track registered robust futexes. Attached to linked list in inodes.
> + */
> +struct futex_robust {
> +	struct list_head list;
> +	union futex_key key;
> +};
> +
> +/**
> + * futex_wake_robust - wake a task that is waiting on a robust futex
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall) when unlocking a
> + * robust futex, but only if %FUTEX_WAITERS is set in the futex.
> + * Unlocking when there are no waiters is done entirely in user space.
> + */
> +static int futex_wake_robust(unsigned long uaddr)
> +{
> +	union futex_key key;
> +	struct futex_hash_bucket *bh;
> +	struct list_head *head;
> +	struct futex_q *this, *next;
> +	int ret;
> +	int value;
> +	int count;
> +	struct futex_q *target;
> +
> +retry:
> +	down_read(&current->mm->mmap_sem);
> +
> +	ret = get_futex_key(uaddr, &key);
> +	if (unlikely(ret != 0))
> +		goto out;
> +
> +	bh = hash_futex(&key);
> +	spin_lock(&bh->lock);
> +
> +	ret = get_futex_value_locked(&value, (int __user *)uaddr);
> +
> +	if (unlikely(ret)) {
> +		spin_unlock(&bh->lock);
> +
> +		/* If we would have faulted, release mmap_sem, fault it in and
> +		 * start all over again.
> +		 */
> +		up_read(&current->mm->mmap_sem);
> +
> +		ret = get_user(value, (int __user *)uaddr);
> +
> +		if (!ret)
> +			goto retry;
> +		return ret;
> +	}
> +
> +	head = &bh->chain;
> +
> +	/*
> +	 * if the owner died, mark the futex as not recoverable
> +	 * and wake up all waiting tasks.
> +	 */
> +	if( value & FUTEX_OWNER_DIED ) {
> +		if (put_user( FUTEX_OWNER_DIED | FUTEX_NOT_RECOVERABLE,
> +				(int __user *) uaddr)) {
> +			ret = -EFAULT;
> +			goto out_unlock;
> +		}
> +		list_for_each_entry_safe(this, next, head, list) {
> +			if (match_futex (&this->key, &key)) {
> +				wake_futex(this);
> +				ret++;
> +			}
> +		}
> +		goto out_unlock;
> +	}
> +	
> +	/* find the first waiting task */
> +	count = 0;
> +	target = NULL;
> +	list_for_each_entry_safe(this, next, head, list) {
> +		if (match_futex (&this->key, &key)) {
> +			if (target == NULL)
> +				target = this;
> +			if (++count > 1)
> +				break;
> +		}
> +	}
> +	
> +	/* if no waiters, unlock the futex */
> +	if (count == 0) {
> +		if (put_user( 0, (int __user *) uaddr)) {
> +			ret = -EFAULT;
> +			goto out_unlock;
> +		}
> +		goto out_unlock;
> +	}
> +	
> +	/* transfer ownership and wake waiting task */
> +	value = (int)target->waiter_pid;
> +	if (count > 1)
> +		value |= FUTEX_WAITERS;
> +	if (put_user( value, (int __user *) uaddr)) {
> +		ret = -EFAULT;
> +		goto out_unlock;
> +	}
> +	wake_futex(target);
> +	ret = 1;
> +
> +out_unlock:
> +	spin_unlock(&bh->lock);
> +out:
> +	up_read(&current->mm->mmap_sem);
> +	return ret;
> +}
> +
> +/**
> + * futex_wait_robust - add current task to wait queue of a robust futex
> + * @uaddr: user space address of the robust futex
> + * @time:  timeout in jiffies. zero for no timeout.
> + *
> + * Called from user space (through sys_futex syscall) when locking a
> + * robust futex. Only called if the futex is already locked by another
> + * task. Uncontended locking is done entirely in user space.
> + */
> +static int futex_wait_robust(unsigned long uaddr, unsigned long time)
> +{
> +	DECLARE_WAITQUEUE(wait, current);
> +	int ret, curval;
> +	struct futex_q q;
> +	struct futex_hash_bucket *bh;
> +
> + retry:
> +	down_read(&current->mm->mmap_sem);
> +
> +	ret = get_futex_key(uaddr, &q.key);
> +	if (unlikely(ret != 0))
> +		goto out_release_sem;
> +
> +	bh = queue_lock(&q, -1, NULL);
> +
> +	ret = get_futex_value_locked(&curval, (int __user *)uaddr);
> +
> +	if (unlikely(ret)) {
> +		queue_unlock(&q, bh);
> +
> +		/* If we would have faulted, release mmap_sem, fault it in and
> +		 * start all over again.
> +		 */
> +		up_read(&current->mm->mmap_sem);
> +
> +		ret = get_user(curval, (int __user *)uaddr);
> +
> +		if (!ret)
> +			goto retry;
> +		return ret;
> +	}
> +
> +	/*
> +	 * user mode called us because futex was owned by a task,
> +	 * but now it's not. Let user mode try again.
> +	 */
> +	if (curval == 0) {
> +		ret = -EAGAIN;
> +		queue_unlock(&q, bh);
> +		goto out_release_sem;
> +	}
> +
> +	/*
> +	 * user mode called us because futex had owner and waitflag was
> +	 * set. That's not true now, so let user mode try again
> +	 */
> +	if ((curval & FUTEX_PID) && !(curval & FUTEX_WAITERS)) {
> +		ret = -EAGAIN;
> +		queue_unlock(&q, bh);
> +		goto out_release_sem;
> +	}
> +
> +	/* if owner has died, we don't want to wait */
> +	if ((curval & FUTEX_OWNER_DIED)) {
> +		ret = -EOWNERDEAD;
> +		queue_unlock(&q, bh);
> +		goto out_release_sem;
> +	}
> +
> +	/*
> +	 * Save pid of waiting task for transferring ownership in
> +	 * futex_wake_robust(). Avoids problem where futex_wake_robust()
> +	 * runs before waiting task is added to futex wait queue.
> +	 */
> +	q.waiter_pid = current->pid;
> +	__queue_me(&q, bh);
> +
> +	/*
> +	 * Now the futex is queued and we have checked the data, we
> +	 * don't want to hold mmap_sem while we sleep.
> +	 */	
> +	up_read(&current->mm->mmap_sem);
> +
> +	/*
> +	 * There might have been scheduling since the queue_me(), as we
> +	 * cannot hold a spinlock across the get_user() in case it
> +	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
> +	 * queueing ourselves into the futex hash.  This code thus has to
> +	 * rely on the futex_wake() code removing us from hash when it
> +	 * wakes us up.
> +	 */
> +
> +	/* add_wait_queue is the barrier after __set_current_state. */
> +	__set_current_state(TASK_INTERRUPTIBLE);
> +	add_wait_queue(&q.waiters, &wait);
> +	/*
> +	 * !list_empty() is safe here without any lock.
> +	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
> +	 */
> +	if (likely(!list_empty(&q.list)))
> +		time = schedule_timeout(time);
> +	__set_current_state(TASK_RUNNING);
> +
> +	/*
> +	 * NOTE: we don't remove ourselves from the waitqueue because
> +	 * we are the only user of it.
> +	 */
> +
> +retry2:
> +	ret = get_futex_value_locked(&curval, (int __user *)uaddr);
> +
> +	if (unlikely(ret)) {
> +		/* If we would have faulted, release mmap_sem, fault it in and
> +		 * start all over again.
> +		 */
> +		up_read(&current->mm->mmap_sem);
> +
> +		ret = get_user(curval, (int __user *)uaddr);
> +
> +		if (!ret) {
> +			down_read(&current->mm->mmap_sem);
> +			goto retry2;
> +		}
> +		unqueue_me(&q);
> +		return ret;
> +	}
> +
> +	if (curval & FUTEX_OWNER_DIED) {
> +		unqueue_me(&q);
> +		return -EOWNERDEAD;
> +	}
> +
> +	/* If we were woken (and unqueued), we succeeded, whatever. */
> +	if (!unqueue_me(&q))
> +		return 0;
> +	if (time == 0)
> +		return -ETIMEDOUT;
> +	/* We expect signal_pending(current), but another thread may
> +	 * have handled it for us already. */
> +	return -EINTR;
> +
> + out_release_sem:
> +	up_read(&current->mm->mmap_sem);
> +	return ret;
> +}
> +
> +/**
> + * futex_free_robust_list - release the list of registered futexes.
> + * @inode: inode that may be a memory mapped file
> + *
> + * Called from dput() when a dentry reference count reaches zero.
> + * If the dentry is associated with a memory mapped file, then
> + * release the list of registered robust futexes that are contained
> + * in that mapping.
> + */
> +void futex_free_robust_list(struct inode *inode)
> +{
> +	struct address_space *mapping;
> +	struct list_head *head;
> +	struct futex_robust *this, *next;
> +
> +	if (inode == NULL)
> +		return;
> +	
> +	mapping = inode->i_mapping;
> +	if (mapping == NULL)
> +		return;
> +
> +	if (list_empty(&mapping->robust_list))
> +		return;
> +		
> +	down(&mapping->robust_sem);
> +	
> +	head = &mapping->robust_list;
> +
> +	list_for_each_entry_safe(this, next, head, list) {
> +		list_del(&this->list);
> +		kfree(this);
> +	}
> +
> +	up(&mapping->robust_sem);
> +	return;
> +}
> +
> +/**
> + * get_private_uaddr - convert a private futex_key to a user addr
> + * @key: the futex_key that identifies a futex.
> + *
> + * Private futex_keys identify a futex that is in non-shared memory.
> + * Robust futexes should never result in private futex_keys, but keep
> + * this code for completeness.
> + * Returns zero if futex is not contained in current task's mm
> + */
> +static unsigned long get_private_uaddr( union futex_key *key)
> +{
> +	unsigned long uaddr = 0;
> +
> +	if (key->private.mm == current->mm)
> +		uaddr = key->private.uaddr;
> +	return uaddr;
> +}
> +
> +/**
> + * get_shared_uaddr - convert a shared futex_key to a user addr.
> + * @key: a futex_key that identifies a futex.
> + * @vma: a vma that may contain the futex
> + *
> + * Shared futex_keys identify a futex that is contained in a vma,
> + * and so may be shared.
> + * Returns zero if futex is not contained in @vma
> + */
> +static unsigned long get_shared_uaddr( union futex_key *key,
> +				       struct vm_area_struct *vma)
> +{
> +	unsigned long uaddr = 0;
> +	unsigned long tmpaddr;
> +	struct address_space *mapping;
> +
> +	mapping = vma->vm_file->f_mapping;
> +	if (key->shared.inode == mapping->host ) {
> +		tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT)
> +				+ (key->shared.offset & ~0x1)
> +				+ vma->vm_start;
> +		if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end)
> +			uaddr = tmpaddr;
> +	}
> +	
> +	return uaddr;
> +}
> +
> +/**
> + * get_futex_uaddr - convert a futex_key to a user addr.
> + * @key: futex_key that identifies a futex
> + * @vma: vma that may contain the futex
> + *
> + * Converts both shared and private futex_keys.
> + * Returns zero if futex is not contained in @vma or in the current
> + * task's mm.
> + */
> +static unsigned long get_futex_uaddr( union futex_key *key,
> +				      struct vm_area_struct *vma)
> +{
> +	unsigned long uaddr;
> +
> +	if ((key->both.offset & 0x1) == 0)
> +		uaddr = get_private_uaddr(key);
> +	else
> +		uaddr = get_shared_uaddr(key,vma);
> +	
> +	return uaddr;
> +}
> +
> +/**
> + * set_owner_died - mark futex when owner dies, then wake a waiting task
> + * @key: futex_key that identifies the futex that is owned by the
> + *       current task.
> + * @uaddr: user space address of the futex.
> + * @value: the current value of the futex.
> + *
> + * Set the %FUTEX_OWNER_DIED flag in the futex, then find the first task
> + * that is waiting on this futex and that is not part of the current
> + * thread group, and wake that task.
> + */
> +static void set_owner_died(union futex_key *key, unsigned long uaddr,
> +			   int value)
> +{
> +	struct futex_hash_bucket *bh;
> +	struct list_head *head;
> +	struct futex_q *this, *next;
> +	wait_queue_t *waitq;
> +	struct list_head *waitq_list;
> +	struct task_struct *task;
> +	int ret;
> +
> +	bh = hash_futex(key);
> +	spin_lock(&bh->lock);
> +	head = &bh->chain;
> +
> +	ret = put_user(FUTEX_OWNER_DIED | value, (int __user *) uaddr);
> +	if (ret != 0) {
> +		spin_unlock(&bh->lock);
> +		WARN_ON(ret!=0);
> +		return;
> +	}
> +
> +	list_for_each_entry_safe(this, next, head, list) {
> +		if (!match_futex (&this->key, key))
> +			continue;
> +
> +		waitq_list = this->waiters.task_list.next;
> +		waitq = list_entry(waitq_list, wait_queue_t, task_list);
> +		task = waitq->task;
> +		if (task->tgid == current->tgid)
> +			continue;
> +
> +		wake_futex(this);
> +		break;
> +	}
> +	spin_unlock(&bh->lock);
> +}
> +
> +/**
> + * find_owned_futex - find futexes owned by the current task
> + * @vma: the vma to search for futexes
> + *
> + * Walk the list of registered robust futexes for this @vma,
> + * setting the %FUTEX_OWNER_DIED flag on those futexes owned
> + * by the current, exiting task.
> + */
> +static void find_owned_futex( struct vm_area_struct *vma )
> +{
> +	struct address_space *mapping;
> +	struct list_head *head;
> +	struct futex_robust *this, *next;
> +	unsigned long uaddr;
> +	int value;
> +	int ret;
> +
> +	mapping = vma->vm_file->f_mapping;
> +	down(&mapping->robust_sem);
> +
> +	head = &mapping->robust_list;
> +	list_for_each_entry_safe(this, next, head, list) {
> +
> +		uaddr = get_futex_uaddr(&this->key, vma);
> +		if (uaddr == 0)
> +			continue;
> +			
> +		if ((ret = get_user(value, (int *)uaddr)) != 0) {
> +			WARN_ON(ret!=0);
> +			continue;
> +		}
> +		
> +		if ((value & FUTEX_PID) == current->pid)
> +			set_owner_died(&this->key,uaddr,value);
> +	}
> +
> +	up(&mapping->robust_sem);
> +}
> +
> +/**
> + * exit_futex - futex processing when a task exits.
> + *
> + * Called from do_exit() when a task exits. Mark all robust futexes
> + * that are owned by the current terminating task as %FUTEX_OWNER_DIED.
> + */
> +
> +void exit_futex(void)
> +{
> +	struct mm_struct *mm;
> +	struct vm_area_struct *vma;
> +
> +	if (current==NULL)
> +		return;
> +
> +	mm = current->mm;
> +	if (mm==NULL)
> +		return;
> +
> +	down_read(&mm->mmap_sem);
> +	
> +	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
> +		if (vma->vm_file == NULL)
> +			continue;
> +			
> +		if (vma->vm_file->f_mapping == NULL)
> +			continue;
> +
> +		find_owned_futex(vma);
> +	}
> +
> +	up_read(&mm->mmap_sem);
> +}
> +
> +/**
> + * futex_register - Record the existence of a robust futex in a vma.
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall) when a robust
> + * futex is created. Looks up the vma that contains the futex and
> + * adds an entry to the list of all robust futexes in the vma.
> + */
> +static int futex_register(unsigned long uaddr)
> +{
> +	int ret;
> +	struct futex_robust *robust;
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *vma;
> +	struct file *file;
> +	struct address_space *mapping;
> +	
> +	robust = kmalloc(sizeof(*robust), GFP_KERNEL);
> +	if (!robust) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	
> +	down_read(&current->mm->mmap_sem);
> +
> +	ret = get_futex_key(uaddr, &robust->key);
> +	if (unlikely(ret != 0))	{
> +		up_read(&current->mm->mmap_sem);
> +		kfree(robust);
> +		goto out;
> +	}
> +
> +	up_read(&current->mm->mmap_sem);
> +
> +	vma = find_extend_vma(mm, uaddr);
> +	if (unlikely(!vma)) {
> +		ret = -EFAULT;
> +		kfree(robust);
> +		goto out;
> +	}
> +	
> +	file = vma->vm_file;
> +	if (!file) {
> +		ret = -EINVAL;
> +		kfree(robust);
> +		goto out;
> +	}
> +	
> +	mapping = file->f_mapping;
> +	down(&mapping->robust_sem);
> +	list_add_tail(&robust->list, &mapping->robust_list);
> +	up(&mapping->robust_sem);
> +
> +out:
> +	return ret;
> +}
> +
> +/**
> + * futex_deregister - Delete robust futex registration from a vma
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall) when a robust
> + * futex is destroyed. Looks up the vma that contains the futex and
> + * removes the futex entry from the list of all robust futexes in
> + * the vma.
> + */
> +static int futex_deregister(unsigned long uaddr)
> +{
> +	union futex_key key;
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *vma;
> +	struct file *file;
> +	struct address_space *mapping;
> +	struct list_head *head;
> +	struct futex_robust *this, *next;
> +	int ret;
> +
> +	down_read(&mm->mmap_sem);
> +
> +	ret = get_futex_key(uaddr, &key);
> +	if (unlikely(ret != 0))
> +		goto out;
> +
> +	vma = find_extend_vma(mm, uaddr);
> +	if (unlikely(!vma)) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +	
> +	file = vma->vm_file;
> +	if (!file) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +	
> +	mapping = file->f_mapping;
> +	down(&mapping->robust_sem);
> +	head = &mapping->robust_list;
> +
> +	list_for_each_entry_safe(this, next, head, list) {
> +		if (match_futex (&this->key, &key)) {
> +			list_del(&this->list);
> +			kfree(this);
> +			break;
> +		}
> +	}
> +
> +	up(&mapping->robust_sem);
> +out:
> +	up_read(&mm->mmap_sem);
> +	return ret;
> +}
> +
> +/**
> + * futex_recover - Recover a futex after its owner died
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall).
> + * When a task dies while owning a robust futex, the futex is
> + * marked with %FUTEX_OWNER_DIED and ownership is transferred
> + * to the next waiting task. That task can choose to restore
> + * the futex to a useful state by calling this function.
> + */
> +static int futex_recover(unsigned long uaddr)
> +{
> +	int ret = 0;
> +	int value;
> +
> +	down_read(&current->mm->mmap_sem);
> +
> +	if ((ret = get_user(value, (int *)uaddr)) != 0)
> +		goto out_release_sem;
> +	
> +	value &= ~FUTEX_OWNER_DIED;
> +	ret = put_user(value, (int *)uaddr);
> +
> + out_release_sem:
> +	up_read(&current->mm->mmap_sem);
> +	return ret;
> +}
> +
> +#endif /* #ifdef CONFIG_ROBUST_FUTEX */
> +
> +
>   long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
>   		unsigned long uaddr2, int val2, int val3)
>   {
> @@ -740,6 +1403,23 @@ long do_futex(unsigned long uaddr, int o
>   	case FUTEX_CMP_REQUEUE:
>   		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
>   		break;
> +#ifdef CONFIG_ROBUST_FUTEX
> +	case FUTEX_WAIT_ROBUST:
> +		ret = futex_wait_robust(uaddr, timeout);
> +		break;
> +	case FUTEX_WAKE_ROBUST:
> +		ret = futex_wake_robust(uaddr);
> +		break;
> +	case FUTEX_REGISTER:
> +		ret = futex_register(uaddr);
> +		break;
> +	case FUTEX_DEREGISTER:
> +		ret = futex_deregister(uaddr);
> +		break;
> +	case FUTEX_RECOVER:
> +		ret = futex_recover(uaddr);
> +		break;
> +#endif
>   	default:
>   		ret = -ENOSYS;
>   	}
> @@ -755,7 +1435,11 @@ asmlinkage long sys_futex(u32 __user *ua
>   	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
>   	int val2 = 0;
> 
> +#ifdef CONFIG_ROBUST_FUTEX
> +	if ((op == FUTEX_WAIT || op == FUTEX_WAIT_ROBUST) && utime) {
> +#else
>   	if ((op == FUTEX_WAIT) && utime) {
> +#endif
>   		if (copy_from_user(&t, utime, sizeof(t)) != 0)
>   			return -EFAULT;
>   		timeout = timespec_to_jiffies(&t) + 1;
> @@ -763,7 +1447,7 @@ asmlinkage long sys_futex(u32 __user *ua
>   	/*
>   	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
>   	 */
> -	if (op >= FUTEX_REQUEUE)
> +	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
>   		val2 = (int) (unsigned long) utime;
> 
>   	return do_futex((unsigned long)uaddr, op, val, timeout,
> _______________________________________________
> robustmutexes mailing list
> [email protected]
> https://lists.osdl.org/mailman/listinfo/robustmutexes

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux