Re: [PATCH 23/33] netvm: skb processing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 30 Oct 2007 17:04:24 +0100
Peter Zijlstra <[email protected]> wrote:

> In order to make sure emergency packets receive all memory needed to proceed
> ensure processing of emergency SKBs happens under PF_MEMALLOC.
> 
> Use the (new) sk_backlog_rcv() wrapper to ensure this for backlog processing.
> 
> Skip taps, since those are user-space again.
> 
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
>  include/net/sock.h |    5 +++++
>  net/core/dev.c     |   44 ++++++++++++++++++++++++++++++++++++++------
>  net/core/sock.c    |   18 ++++++++++++++++++
>  3 files changed, 61 insertions(+), 6 deletions(-)
> 
> Index: linux-2.6/net/core/dev.c
> ===================================================================
> --- linux-2.6.orig/net/core/dev.c
> +++ linux-2.6/net/core/dev.c
> @@ -1976,10 +1976,23 @@ int netif_receive_skb(struct sk_buff *sk
>  	struct net_device *orig_dev;
>  	int ret = NET_RX_DROP;
>  	__be16 type;
> +	unsigned long pflags = current->flags;
> +
> +	/* Emergency skb are special, they should
> +	 *  - be delivered to SOCK_MEMALLOC sockets only
> +	 *  - stay away from userspace
> +	 *  - have bounded memory usage
> +	 *
> +	 * Use PF_MEMALLOC as a poor mans memory pool - the grouping kind.
> +	 * This saves us from propagating the allocation context down to all
> +	 * allocation sites.
> +	 */
> +	if (skb_emergency(skb))
> +		current->flags |= PF_MEMALLOC;
>  
>  	/* if we've gotten here through NAPI, check netpoll */
>  	if (netpoll_receive_skb(skb))
> -		return NET_RX_DROP;
> +		goto out;

Why the change? doesn't gcc optimize the common exit case anyway?

>  
>  	if (!skb->tstamp.tv64)
>  		net_timestamp(skb);
> @@ -1990,7 +2003,7 @@ int netif_receive_skb(struct sk_buff *sk
>  	orig_dev = skb_bond(skb);
>  
>  	if (!orig_dev)
> -		return NET_RX_DROP;
> +		goto out;
>  
>  	__get_cpu_var(netdev_rx_stat).total++;
>  
> @@ -2009,6 +2022,9 @@ int netif_receive_skb(struct sk_buff *sk
>  	}
>  #endif
>  
> +	if (skb_emergency(skb))
> +		goto skip_taps;
> +
>  	list_for_each_entry_rcu(ptype, &ptype_all, list) {
>  		if (!ptype->dev || ptype->dev == skb->dev) {
>  			if (pt_prev)
> @@ -2017,6 +2033,7 @@ int netif_receive_skb(struct sk_buff *sk
>  		}
>  	}
>  
> +skip_taps:
>  #ifdef CONFIG_NET_CLS_ACT
>  	if (pt_prev) {
>  		ret = deliver_skb(skb, pt_prev, orig_dev);
> @@ -2029,19 +2046,31 @@ int netif_receive_skb(struct sk_buff *sk
>  
>  	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
>  		kfree_skb(skb);
> -		goto out;
> +		goto unlock;
>  	}
>  
>  	skb->tc_verd = 0;
>  ncls:
>  #endif
>  
> +	if (skb_emergency(skb))
> +		switch(skb->protocol) {
> +			case __constant_htons(ETH_P_ARP):
> +			case __constant_htons(ETH_P_IP):
> +			case __constant_htons(ETH_P_IPV6):
> +			case __constant_htons(ETH_P_8021Q):
> +				break;

Indentation is wrong, and hard coding protocol values as spcial case
seems bad here. What about vlan's, etc?

> +			default:
> +				goto drop;
> +		}
> +
>  	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
>  	if (!skb)
> -		goto out;
> +		goto unlock;
>  	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
>  	if (!skb)
> -		goto out;
> +		goto unlock;
>  
>  	type = skb->protocol;
>  	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
> @@ -2056,6 +2085,7 @@ ncls:
>  	if (pt_prev) {
>  		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
>  	} else {
> +drop:
>  		kfree_skb(skb);
>  		/* Jamal, now you will not able to escape explaining
>  		 * me how you were going to use this. :-)
> @@ -2063,8 +2093,10 @@ ncls:
>  		ret = NET_RX_DROP;
>  	}
>  
> -out:
> +unlock:
>  	rcu_read_unlock();
> +out:
> +	tsk_restore_flags(current, pflags, PF_MEMALLOC);
>  	return ret;
>  }
>  
> Index: linux-2.6/include/net/sock.h
> ===================================================================
> --- linux-2.6.orig/include/net/sock.h
> +++ linux-2.6/include/net/sock.h
> @@ -523,8 +523,13 @@ static inline void sk_add_backlog(struct
>  	skb->next = NULL;
>  }
>  
> +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
> +
>  static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
>  {
> +	if (skb_emergency(skb))
> +		return __sk_backlog_rcv(sk, skb);
> +
>  	return sk->sk_backlog_rcv(sk, skb);
>  }
>  
> Index: linux-2.6/net/core/sock.c
> ===================================================================
> --- linux-2.6.orig/net/core/sock.c
> +++ linux-2.6/net/core/sock.c
> @@ -319,6 +319,24 @@ int sk_clear_memalloc(struct sock *sk)
>  }
>  EXPORT_SYMBOL_GPL(sk_clear_memalloc);
>  
> +#ifdef CONFIG_NETVM
> +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +	int ret;
> +	unsigned long pflags = current->flags;
> +
> +	/* these should have been dropped before queueing */
> +	BUG_ON(!sk_has_memalloc(sk));
> +
> +	current->flags |= PF_MEMALLOC;
> +	ret = sk->sk_backlog_rcv(sk, skb);
> +	tsk_restore_flags(current, pflags, PF_MEMALLOC);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(__sk_backlog_rcv);
> +#endif
> +
>  static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
>  {
>  	struct timeval tv;


I am still not convinced that this solves the problem well enough
to be useful.  Can you really survive a heavy memory overcommit?
In other words, can you prove that the added complexity causes the system
to survive a real test where otherwise it would not?


-- 
Stephen Hemminger <[email protected]>

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux