David S. Miller a écrit :
From: Andrew Morton <[email protected]>
Date: Fri, 23 Sep 2005 00:10:13 -0700
Ravikiran G Thirumalai <[email protected]> wrote:
Hi Andrew,
This patchset contains alloc_percpu + bigrefs + bigrefs for netdevice
refcount. This patchset improves tbench lo performance by 6% on a 8 way IBM
x445.
I think we'd need more comprehensive benchmarks than this before adding
large amounts of complex core code.
I agree. tbench over loopback is about as far from real life
as it gets.
I'm still against expanding these networking datastructures with
bigrefs just for this stuff. Some people have per-cpu and per-node on
the brain, and it's starting to bloat things up a little bit too much.
Hi David
I agree with you about bigref might be a bit of over-engineering for netdevice
refcount.
I sent one month ago a patch to reorder 'struct net_device' to at least make
it SMP friendly, with no per-cpu or per-node additional memory but got no comment.
It's important to place mostly read parts together, so that a cache lines can
be shared by CPUS. It's important to separate hot fields in three different
cache lines to that a cpu can work on the transmit while another is receiving
frames without paying cache line ping pongs.
Please (re)consider this patch since it really reduce CPU load and/or memory
bus trafic between CPUS.
I ask this question :
- Is the following comment found in netdevice.h still true ?
/*
* This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* the interface.
*/
If it is still true, then I suspect name_hlist cannot be moved like I did.
Thank you
- [NET] : Reorder some hot fields of struct net_device and place them on
separate cache lines in SMP to lower memory bouncing between multiple CPU
accessing the device.
- One part is mostly used on receive path (including eth_type_trans())
(poll_list, poll, quota, weight, last_rx, dev_addr, broadcast)
- One part is mostly used on queue transmit path (qdisc)
(queue_lock, qdisc, qdisc_sleeping, qdisc_list, tx_queue_len)
- One part is mostly used on xmit path (device)
(xmit_lock, xmit_lock_owner, priv, hard_start_xmit, trans_start)
'features' is placed outside of these hot points, in a location that may be
shared by all cpus (because mostly read)
Minor but maybe not possible because of ABI :
name_hlist is moved close to name[IFNAMSIZ] to speedup __dev_get_by_name()
Signed-off-by: Eric Dumazet <[email protected]>
--- linux-2.6.14-rc2/include/linux/netdevice.h 2005-09-20 05:00:41.000000000 +0200
+++ linux-2.6.14-rc2-ed/include/linux/netdevice.h 2005-09-23 10:52:43.000000000 +0200
@@ -265,6 +265,8 @@
* the interface.
*/
char name[IFNAMSIZ];
+ /* device name hash chain */
+ struct hlist_node name_hlist;
/*
* I/O specific fields
@@ -292,6 +294,21 @@
/* ------- Fields preinitialized in Space.c finish here ------- */
+ /* Net device features */
+ unsigned long features;
+#define NETIF_F_SG 1 /* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
+#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
+#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
+#define NETIF_F_LLTX 4096 /* LockLess TX */
+
struct net_device *next_sched;
/* Interface index. Unique device identifier */
@@ -316,9 +333,6 @@
* will (read: may be cleaned up at will).
*/
- /* These may be needed for future network-power-down code. */
- unsigned long trans_start; /* Time (in jiffies) of last Tx */
- unsigned long last_rx; /* Time of last Rx */
unsigned short flags; /* interface flags (a la BSD) */
unsigned short gflags;
@@ -328,15 +342,12 @@
unsigned mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
- void *priv; /* pointer to private data */
struct net_device *master; /* Pointer to master device of a group,
* which this device is member of.
*/
/* Interface address info. */
- unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
- unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
unsigned char addr_len; /* hardware address length */
unsigned short dev_id; /* for shared network cards */
@@ -346,8 +357,6 @@
int promiscuity;
int allmulti;
- int watchdog_timeo;
- struct timer_list watchdog_timer;
/* Protocol specific pointers */
@@ -358,32 +367,62 @@
void *ec_ptr; /* Econet specific data */
void *ax25_ptr; /* AX.25 specific data */
- struct list_head poll_list; /* Link to poll list */
+/*
+ * Cache line mostly used on receive path (including eth_type_trans())
+ */
+ struct list_head poll_list ____cacheline_aligned_in_smp;
+ /* Link to poll list */
+
+ int (*poll) (struct net_device *dev, int *quota);
int quota;
int weight;
+ unsigned long last_rx; /* Time of last Rx */
+ /* Interface address info used in eth_type_trans() */
+ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast
+ because most packets are unicast) */
+
+ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
+/*
+ * Cache line mostly used on queue transmit path (qdisc)
+ */
+ /* device queue lock */
+ spinlock_t queue_lock ____cacheline_aligned_in_smp;
struct Qdisc *qdisc;
struct Qdisc *qdisc_sleeping;
- struct Qdisc *qdisc_ingress;
struct list_head qdisc_list;
unsigned long tx_queue_len; /* Max frames per queue allowed */
/* ingress path synchronizer */
spinlock_t ingress_lock;
+ struct Qdisc *qdisc_ingress;
+
+/*
+ * One part is mostly used on xmit path (device)
+ */
/* hard_start_xmit synchronizer */
- spinlock_t xmit_lock;
+ spinlock_t xmit_lock ____cacheline_aligned_in_smp;
/* cpu id of processor entered to hard_start_xmit or -1,
if nobody entered there.
*/
int xmit_lock_owner;
- /* device queue lock */
- spinlock_t queue_lock;
+ void *priv; /* pointer to private data */
+ int (*hard_start_xmit) (struct sk_buff *skb,
+ struct net_device *dev);
+ /* These may be needed for future network-power-down code. */
+ unsigned long trans_start; /* Time (in jiffies) of last Tx */
+
+ int watchdog_timeo; /* used by dev_watchdog() */
+ struct timer_list watchdog_timer;
+
+/*
+ * refcnt is a very hot point, so align it on SMP
+ */
/* Number of references to this device */
- atomic_t refcnt;
+ atomic_t refcnt ____cacheline_aligned_in_smp;
+
/* delayed register/unregister */
struct list_head todo_list;
- /* device name hash chain */
- struct hlist_node name_hlist;
/* device index hash chain */
struct hlist_node index_hlist;
@@ -396,21 +435,6 @@
NETREG_RELEASED, /* called free_netdev */
} reg_state;
- /* Net device features */
- unsigned long features;
-#define NETIF_F_SG 1 /* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
-#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
-#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
-#define NETIF_F_LLTX 4096 /* LockLess TX */
-
/* Called after device is detached from network. */
void (*uninit)(struct net_device *dev);
/* Called after last user reference disappears. */
@@ -419,10 +443,7 @@
/* Pointers to interface service routines. */
int (*open)(struct net_device *dev);
int (*stop)(struct net_device *dev);
- int (*hard_start_xmit) (struct sk_buff *skb,
- struct net_device *dev);
#define HAVE_NETDEV_POLL
- int (*poll) (struct net_device *dev, int *quota);
int (*hard_header) (struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
|
|