Re: X-freeze after clflush changes [Was: 2.6.23-rc6-mm1]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 09/19/2007 01:53 PM, Jiri Slaby wrote:
> On 09/19/2007 01:43 PM, Jiri Slaby wrote:
>> On 09/18/2007 10:18 AM, Andrew Morton wrote:
>>> - The Vaio hangs when quitting X due to x86_64-mm-cpa-clflush.patch, but
>>>   I didn't drop that patch because the iommu patch series depends on it.
>> No matter whether slub/slab is selected someone gets a page and moves/adds its
> 
> Oh, only adds, if it moves, it won't break the list. Going to check for
> POISON1/2 in __list_add, will get results (if any) in few moments...

Huh, it took longer than I expect :). Changed this:
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 836c218..cd8499c 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -112,7 +112,14 @@ static inline void save_page(struct page *fpage, int data)
                        return;
                SetPageFlush(fpage);
        }
+       printk("ADD (%s): E=%p, H=%p, H->N=%p, N->P=%p, N->N=%p; PREV0=%p, "
+                       "NEXT0=%p, ",
+                       current->comm, &fpage->lru,
+                       &deferred_pages, deferred_pages.next,
+                       deferred_pages.next->prev, deferred_pages.next->next,
+                       fpage->lru.prev, fpage->lru.next);
        list_add(&fpage->lru, &deferred_pages);
+       printk("PREV1=%p, NEXT1=%p\n", fpage->lru.prev, fpage->lru.next);
 }

 /*
@@ -274,6 +281,7 @@ void global_flush_tlb(void)
        down_read(&init_mm.mmap_sem);
        arg.full_flush = full_flush;
        full_flush = 0;
+       printk("FLUSH\n");
        list_replace_init(&deferred_pages, &arg.l);
        up_read(&init_mm.mmap_sem);

diff --git a/include/linux/list.h b/include/linux/list.h
index f29fc9c..1add963 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -265,6 +265,8 @@ static inline void list_del_init(struct list_head *entry)
 static inline void list_move(struct list_head *list, struct list_head *head)
 {
        __list_del(list->prev, list->next);
+       list->next = LIST_POISON1;
+       list->prev = LIST_POISON2;
        list_add(list, head);
 }

@@ -277,6 +279,8 @@ static inline void list_move_tail(struct list_head *list,
                                  struct list_head *head)
 {
        __list_del(list->prev, list->next);
+       list->next = LIST_POISON1;
+       list->prev = LIST_POISON2;
        list_add_tail(list, head);
 }

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9..57573d5 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -15,15 +15,34 @@
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
-
+#include <linux/sched.h>
 void __list_add(struct list_head *new,
                              struct list_head *prev,
                              struct list_head *next)
 {
+       static unsigned int a, b;
+       unsigned long off;
+
+       if (unlikely(!a && current && current->comm[0] == 'X'))
+               a++;
+
+       if (unlikely(a && !b && (void *)new >= (void *)mem_map &&
+               (void *)new < (void *)(mem_map + 1048576) &&
+               (new->prev != LIST_POISON2 || new->next != LIST_POISON1) &&
+               (new->prev != NULL || new->next != NULL) &&
+               (new->prev != new || new->next != new))) {
+               off = ((void *)new - (void *)mem_map) % sizeof(struct page);
+               if (off == offsetof(struct page, lru)) {
+                       printk(KERN_DEBUG "POISONS (%p): "
+                                       "%p, %p\n", new, new->prev, new->next);
+                       dump_stack();
+               }
+       }
        if (unlikely(next->prev != prev)) {
                printk(KERN_ERR "list_add corruption. next->prev should be "
                        "prev (%p), but was %p. (next=%p).\n",
                        prev, next->prev, next);
+               b++;
                BUG();
        }
        if (unlikely(prev->next != next)) {

---------8<---------8<---------8<---------8<---------8<---------8<----
and got this:
ADD (X): E=ffff81000115b9e0, H=ffffffff80673aa0, H->N=ffff8100011573e0,
N->P=ffffffff80673aa0, N->N=ffff81000115ba18; PREV0=0000000000200200,
NEXT0=0000000000100100, PREV1=ffffffff80673aa0, NEXT1=ffff8100011573e0
/----------------------------------------------------------\
this (^^^^) was output from unmap path, see (1) below
and here (vvvv) follows output from free_page path, see (2)
\----------------------------------------------------------/
POISONS (ffff81000115b9e0): ffffffff80673aa0, ffff8100011573e0

Call Trace:
 [<ffffffff80328c06>] __list_add+0xf6/0x190
 [<ffffffff80328cac>] list_add+0xc/0x10
 [<ffffffff8026e46d>] free_hot_cold_page+0xfd/0x170
 [<ffffffff8026e52b>] free_hot_page+0xb/0x10
 [<ffffffff8026e555>] __free_pages+0x25/0x30
 [<ffffffff8026e58b>] free_pages+0x2b/0x40
 [<ffffffff8037cf16>] agp_generic_destroy_page+0x56/0x70
 [<ffffffff8037d9d5>] agp_free_memory+0x65/0xd0
 [<ffffffff8037c0e9>] agp_free_memory_wrap+0x39/0x60
 [<ffffffff8037c79b>] agp_release+0xdb/0x1c0
 [<ffffffff80291440>] __fput+0xc0/0x1b0
 [<ffffffff802915b6>] fput+0x16/0x20
 [<ffffffff8028e516>] filp_close+0x56/0x90
 [<ffffffff8023a552>] put_files_struct+0xc2/0xd0
 [<ffffffff8023ba55>] do_exit+0x1e5/0x990
 [<ffffffff8023c237>] do_group_exit+0x37/0x90
 [<ffffffff8023c2a2>] sys_exit_group+0x12/0x20
 [<ffffffff8020bd0e>] system_call+0x7e/0x83

ADD (X): E=ffff81000115b9a8, H=ffffffff80673aa0, H->N=ffff81000115b9e0,
N->P=ffffffff8067ef50, N->N=ffff81000115ba18; PREV0=0000000000200200,
NEXT0=0000000000100100, <3>list_add corruption. next->prev should be prev
(ffffffff80673aa0), but was ffffffff8067ef50. (next=ffff81000115b9e0).
------------[ cut here ]------------
kernel BUG at /home/l/latest/xxx/lib/list_debug.c:46!
invalid opcode: 0000 [1] SMP
last sysfs file: /devices/pci0000:00/0000:00:02.0/enable
CPU 1
Modules linked in: ipv6 floppy sr_mod ehci_hcd cdrom rtc_cmos usbhid rtc_core
rtc_lib
Pid: 1603, comm: X Not tainted 2.6.23-rc6-mm1_64 #51
RIP: 0010:[<ffffffff80328c73>]  [<ffffffff80328c73>] __list_add+0x163/0x190
RSP: 0018:ffff810004e31c38  EFLAGS: 00010202
RAX: 0000000000000079 RBX: ffff81000115b9a8 RCX: ffffffff80673f68
RDX: ffffffff80673f68 RSI: 0000000000000086 RDI: ffffffff80673f60
RBP: ffff810004e31c58 R08: 0000000000000000 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff80673aa0
R13: ffff81000115b9e0 R14: 0000000000006350 R15: ffff810006211a80
FS:  0000000000000000(0000) GS:ffff810003016500(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000514cc0 CR3: 0000000000201000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process X (pid: 1603, threadinfo ffff810004e30000, task ffff810003b26bd0)
last branch before last exception/interrupt
 from  [<ffffffff80238a83>] printk+0xa3/0xb0
 to  [<ffffffff80328c6c>] __list_add+0x15c/0x190
Stack:  ffffffff8067ef50 ffff81000115b9a8 ffff81000115b980 ffff8100011573b8
 ffff810004e31c68 ffffffff80328cac ffff810004e31d08 ffffffff80225b28
 ffff81000115ba18 0000000000200200 0000000000100100 ffffffff8067ee80
Call Trace:
 [<ffffffff80328cac>] list_add+0xc/0x10
 [<ffffffff80225b28>] __change_page_attr+0x478/0x4a0
 [<ffffffff80225bf6>] change_page_attr_addr+0xa6/0x140
 [<ffffffff80225cc3>] change_page_attr+0x33/0x40
 [<ffffffff8037cf04>] agp_generic_destroy_page+0x44/0x70
 [<ffffffff8037d9d5>] agp_free_memory+0x65/0xd0
 [<ffffffff8037c0e9>] agp_free_memory_wrap+0x39/0x60
 [<ffffffff8037c79b>] agp_release+0xdb/0x1c0
 [<ffffffff80291440>] __fput+0xc0/0x1b0
 [<ffffffff802915b6>] fput+0x16/0x20
 [<ffffffff8028e516>] filp_close+0x56/0x90
 [<ffffffff8023a552>] put_files_struct+0xc2/0xd0
 [<ffffffff8023ba55>] do_exit+0x1e5/0x990
 [<ffffffff8023c237>] do_group_exit+0x37/0x90
 [<ffffffff8023c2a2>] sys_exit_group+0x12/0x20
 [<ffffffff8020bd0e>] system_call+0x7e/0x83


Code: 0f 0b eb fe 80 ba e2 02 00 00 58 b8 01 00 00 00 0f 45 05 c6
RIP  [<ffffffff80328c73>] __list_add+0x163/0x190
 RSP <ffff810004e31c38>
Fixing recursive fault but reboot is needed!


---------8<---------8<---------8<---------8<---------8<---------8<----
That means
void agp_generic_destroy_page(void *addr)
{
        struct page *page;

        if (addr == NULL)
                return;

        page = virt_to_page(addr);
(1)     unmap_page_from_agp(page);
        put_page(page);
(2)     free_page((unsigned long)addr);
        atomic_dec(&agp_bridge->current_memory_agp);
}

(1) unmap_page_from_agp -> change_page_attr -> change_page_attr_addr ->
__change_page_attr -> save_page -> list_add(&fpage->lru, &deferred_pages);
(2) free_page -> free_pages -> __free_pages -> free_hot_page ->
free_hot_cold_page -> list_add(&page->lru, &pcp->list);

any ideas how to fix this?

regards,
-- 
Jiri Slaby ([email protected])
Faculty of Informatics, Masaryk University
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux