I'm sending this on mostly because it was a bit of a pain to track down, and hopefully it will save time if anyone else hits this while playing with the -rt kernel. It is NOT the right way to fix things, so please don't even think of applying this patch (unless you need it, in your own local tree :-). One of these days when we have time to breath we'll look into fixing this the right way, if someone doesn't beat us to it first. :-) - Ted
Attachment:
paper-bag.gif
Description: GIF image
Fix for tg3 networking lockup (LTC BZ# 25487) The tg3 networking card is a finicky beast, and needs to be tickled every tenth of a second or so or it will go deaf and dumb until it is reset via "ifdown eth0; ifup eth0". The problem comes if a workload consumes enough CPU power so that tg3_timer() doesn't run frequently enough. Unfortunately, the softirq_timer doesn't run at a sufficiently high priority to guarantee this, and boosting softirq_timer to prio 90 or so causes the customer benchmark to fail due to a decrease in determinism due to running all timers at high priority. The right fix is to change the timer code to dynamically change priorities, but this is a non-trivial fix. This is a quick hack driven by code freeze deadlines. :-{ What we do is hack in a special case hook into kernel/clockevents.c to enforce tg3_timer getting called approximately every 0.128 seconds (assuming HZ=250). This appears to be sufficient to stop the networking hardware from going insane. Brown-Paper-Bag-By: "Theodore Ts'o" <[email protected]> Index: linux-rtj12.7/drivers/net/tg3.c =================================================================== --- linux-rtj12.7.orig/drivers/net/tg3.c 2006-08-02 10:11:09.000000000 -0400 +++ linux-rtj12.7/drivers/net/tg3.c 2006-08-03 02:17:41.000000000 -0400 @@ -342,6 +342,36 @@ { "interrupt test (offline)" }, }; +/* + * TG3 timer section + * + * HACK HACK HACK --- we need to make sure the timer runs at a high + * priority, but unfortunately there's no good way to do this given + * the current timer infrastructure. So we kludge it, by hacking the + * clockevents code. I wish there were a better a way, but changing + * all of the timer infrastructure to work like hrtimers is far too + * invasive, and the hrtimer infrastructure assumes that priority + * boosting only happens to in response to a process running at a + * particular priority depending on an hrtimer, so in order to use + * hrtimers I would have needed to create a kernel thread and then + * export all of the hrtimer functions. This is ugly as sin, but it + * involves the smallest possible changes for ease of auditing. --- + * [tytso:20060803.0217EDT] + */ + +extern void (*tg3_hack_hook)(void); /* defined in kernel/clockevents.c */ +static void tg3_timer(unsigned long __opaque); /* defined below */ +static LIST_HEAD(tg3_timer_list); /* list of all active tg3 interfaces */ + +static void tg3_run_timers(void) +{ + struct tg3 *tp; + + list_for_each_entry(tp, &tg3_timer_list, timer_hack_list) { + tg3_timer((unsigned long) tp); + } +} + static void tg3_write32(struct tg3 *tp, u32 off, u32 val) { writel(val, tp->regs + off); @@ -3480,7 +3510,6 @@ static void tg3_reset_task(void *_data) { struct tg3 *tp = _data; - unsigned int restart_timer; tg3_full_lock(tp, 0); tp->tg3_flags |= TG3_FLAG_IN_RESET_TASK; @@ -3497,7 +3526,6 @@ tg3_full_lock(tp, 1); - restart_timer = tp->tg3_flags2 & TG3_FLG2_RESTART_TIMER; tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER; tg3_halt(tp, RESET_KIND_SHUTDOWN, 0); @@ -3505,9 +3533,6 @@ tg3_netif_start(tp); - if (restart_timer) - mod_timer(&tp->timer, jiffies + 1); - tp->tg3_flags &= ~TG3_FLAG_IN_RESET_TASK; tg3_full_unlock(tp); @@ -6328,7 +6353,6 @@ spin_unlock(&tp->lock); tp->timer.expires = jiffies + tp->timer_offset; - add_timer(&tp->timer); } static int tg3_test_interrupt(struct tg3 *tp) @@ -6570,7 +6594,7 @@ tg3_full_lock(tp, 0); - add_timer(&tp->timer); + list_add(&tp->timer_hack_list, &tg3_timer_list); tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE; tg3_enable_ints(tp); @@ -6825,7 +6849,7 @@ netif_stop_queue(dev); - del_timer_sync(&tp->timer); + list_del(&tp->timer_hack_list); tg3_full_lock(tp, 1); #if 0 @@ -10751,6 +10775,7 @@ spin_lock_init(&tp->tx_lock); spin_lock_init(&tp->indirect_lock); INIT_WORK(&tp->reset_task, tg3_reset_task, tp); + INIT_LIST_HEAD(&tp->timer_hack_list); tp->regs = ioremap_nocache(tg3reg_base, tg3reg_len); if (tp->regs == 0UL) { @@ -10945,6 +10970,7 @@ (pdev->dma_mask == DMA_32BIT_MASK) ? 32 : (((u64) pdev->dma_mask == DMA_40BIT_MASK) ? 40 : 64)); + tg3_hack_hook = tg3_run_timers; return 0; err_out_iounmap: @@ -10997,7 +11023,7 @@ flush_scheduled_work(); tg3_netif_stop(tp); - del_timer_sync(&tp->timer); + list_del(&tp->timer_hack_list); tg3_full_lock(tp, 1); tg3_disable_ints(tp); @@ -11018,7 +11044,7 @@ tg3_init_hw(tp); tp->timer.expires = jiffies + tp->timer_offset; - add_timer(&tp->timer); + list_add(&tp->timer_hack_list, &tg3_timer_list); netif_device_attach(dev); tg3_netif_start(tp); @@ -11052,7 +11078,7 @@ tg3_init_hw(tp); tp->timer.expires = jiffies + tp->timer_offset; - add_timer(&tp->timer); + list_add(&tp->timer_hack_list, &tg3_timer_list); tg3_netif_start(tp); @@ -11077,6 +11103,7 @@ static void __exit tg3_cleanup(void) { + tg3_hack_hook = 0; pci_unregister_driver(&tg3_driver); } Index: linux-rtj12.7/drivers/net/tg3.h =================================================================== --- linux-rtj12.7.orig/drivers/net/tg3.h 2006-08-02 10:11:09.000000000 -0400 +++ linux-rtj12.7/drivers/net/tg3.h 2006-08-02 11:50:20.000000000 -0400 @@ -2208,6 +2208,7 @@ u32 timer_offset; u16 asf_counter; u16 asf_multiplier; + struct list_head timer_hack_list; struct tg3_link_config link_config; struct tg3_bufmgr_config bufmgr_config; Index: linux-rtj12.7/kernel/time/clockevents.c =================================================================== --- linux-rtj12.7.orig/kernel/time/clockevents.c 2006-08-01 19:02:55.000000000 -0400 +++ linux-rtj12.7/kernel/time/clockevents.c 2006-08-03 02:07:23.000000000 -0400 @@ -91,6 +91,24 @@ return IRQ_HANDLED; } +/* I am so ashamed... */ +void (*tg3_hack_hook)(void); +EXPORT_SYMBOL_GPL(tg3_hack_hook); + +#if (HZ == 1000) +#define TG3_HACK_MASK (127) +#elif (HZ == 250) +#define TG3_HACK_MASK (31) +#else /* HZ == 100 */ +#define TG3_HACK_MASK (7) +#endif + +static inline void check_tg3_hack(void) +{ + if (tg3_hack_hook && ((jiffies & TG3_HACK_MASK) == 0)) + (*tg3_hack_hook)(); +} + /* * Handle tick */ @@ -99,6 +117,8 @@ write_seqlock(&xtime_lock); do_timer(regs); write_sequnlock(&xtime_lock); + + check_tg3_hack(); timeofday_overflow_protection(); } @@ -111,6 +131,7 @@ do_timer(regs); write_sequnlock(&xtime_lock); + check_tg3_hack(); update_process_times(user_mode(regs)); } @@ -123,6 +144,7 @@ do_timer(regs); write_sequnlock(&xtime_lock); + check_tg3_hack(); update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING, regs); }
- Follow-Ups:
- Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup
- From: Herbert Xu <[email protected]>
- Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup
- Prev by Date: Re: Can someone explain under what condition inode cache pages can be swapped out?
- Next by Date: Re: get_device in device_create_file
- Previous by thread: Next 2.6.17-stable review cycle will be starting in about 24 hours
- Next by thread: Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup
- Index(es):