Hi all, We know the buddy algorithm is very good at performance and deal with the external fragmentation about linux memory management. But IMHO it's not a best algorithm for the embedded linux system, especially on NOMMU arches. Here I wrote a memory algorithm and I really hope it's a starting point to spawn a new memory algorithm for the embedded linux system. My thoughts is based on the current SLOB algorithm in the linux kernel and the new one is a replacement of buddy system and can work with both SLAB and SLOB well. Now it's a patch against 2.6.16 and most of modification is in mm/page_alloc.c. But It's easily ported to the newest kernel and I can create a new option in the kernel to enable it. I'll try my best to change as least as the current code in mm/page_alloc.c The basic implementation is as follows: 1) maintain a (struct list_head) list in the zone, every piece of free pages is added to the list in sequence by the place of the pages in the memory. 2) when free pages, __free_one_page() function try to merge it as long as the near pages on the list is contiguous with it, no matter if the size of them is different or not. If it can't be merged, it will be placed to the list alone. 3) When allocate pages, __rmqueue() function search the zone list and find a exact fit page block or the first available one. 4) buddyinfo is kept to show the fragmentation of the system. The patch is below and the attachment as well. I really appreciate any comments, suggestions, bug-fixes, and improvement. Thanks, -Aubrey ------------------------------------------------------------------------------------------------------- --- /home/aubrey/cvs/kernel/branch/uClinux-dist/linux-2.6.x/mm/page_alloc.c 2006-12-19 16:18:46.000000000 +0800 +++ mm/page_alloc.c 2006-12-30 23:03:56.000000000 +0800 @@ -323,37 +323,57 @@ static inline void __free_one_page(struc { unsigned long page_idx; int order_size = 1 << order; + struct list_head *p_cake_list; + struct page *cur_page = NULL, *prev_page; + unsigned long new_size; if (unlikely(PageCompound(page))) destroy_compound_page(page, order); page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - BUG_ON(page_idx & (order_size - 1)); +// BUG_ON(page_idx & (order_size - 1)); BUG_ON(bad_range(zone, page)); zone->free_pages += order_size; - while (order < MAX_ORDER-1) { - unsigned long combined_idx; - struct free_area *area; - struct page *buddy; - - buddy = __page_find_buddy(page, page_idx, order); - if (!page_is_buddy(page, buddy, order)) - break; /* Move the buddy up one level. */ - - list_del(&buddy->lru); - area = zone->free_area + order; - area->nr_free--; - rmv_page_order(buddy); - combined_idx = __find_combined_index(page_idx, order); - page = page + (combined_idx - page_idx); - page_idx = combined_idx; - order++; - } - set_page_order(page, order); - list_add(&page->lru, &zone->free_area[order].free_list); - zone->free_area[order].nr_free++; + + list_for_each(p_cake_list, &zone->cake_list) { + cur_page = list_entry(p_cake_list, struct page, cake_piece); + if (cur_page > page) + break; + } + prev_page = list_entry(p_cake_list->prev, struct page, cake_piece); + + /* + * case 1: best case, the inserted page is exactly conjoint + * with prev_page and cur_page + */ + if ((prev_page + prev_page->private == page) && + (page + order_size == cur_page)) { + new_size = prev_page->private + order_size + cur_page->private; + set_page_order(prev_page, new_size); + list_del(p_cake_list); + /* + * case 2: the inserted page will be conjoint with prev_page + */ + } else if (prev_page + prev_page->private == page) { + new_size = prev_page->private + order_size; + set_page_order(prev_page, new_size); + /* + * case 3: the inserted page will be conjoint with cur_page + */ + } else if (page + order_size == cur_page) { + new_size = order_size + cur_page->private; + set_page_order(page, new_size); + list_add_tail(&page->cake_piece, p_cake_list); + list_del(p_cake_list); + /* + * case 4: worst case, the inserted page is alone. + */ + } else { + set_page_order(page, order_size); + list_add_tail(&page->cake_piece, p_cake_list); + } } static inline int free_pages_check(struct page *page) @@ -555,25 +575,62 @@ static int prep_new_page(struct page *pa */ static struct page *__rmqueue(struct zone *zone, unsigned int order) { - struct free_area * area; - unsigned int current_order; - struct page *page; + struct page *page, *new_page = NULL; + struct list_head *p_cake_list, *first = NULL; + unsigned int order_size = 1 << order; + int i, flag = 1; + /* + * Find the exact fit block or the first available block + */ + list_for_each(p_cake_list, &zone->cake_list) { + page = list_entry(p_cake_list, struct page, cake_piece); + if (page->private == order_size) { + list_del(p_cake_list); + goto got_page; + } + if (flag && page->private > order_size) { + new_page = page; + first = p_cake_list; + flag = 0; + } + } - for (current_order = order; current_order < MAX_ORDER; ++current_order) { - area = zone->free_area + current_order; - if (list_empty(&area->free_list)) - continue; + if (!new_page) {/* no block */ + printk("CAKE DEBUG:NO block available\n"); + return NULL; + } else { + page = new_page; + p_cake_list = first; + } + + /* + * blah blah blah, but page should be 2-page aligned, + * because task stack should be on the 8K boundary + */ + if (order_size > 1) { + new_page = page + order_size; + set_page_order(new_page, page->private - order_size); + list_add(&new_page->cake_piece, p_cake_list); + list_del(p_cake_list); + } else if (page->private == 2) { + set_page_order(page, 1); + page ++; + } else { + new_page = page + 2; + set_page_order(new_page, page->private - 2); + list_add(&new_page->cake_piece, p_cake_list); + set_page_order(page, 1); + page ++; + } - page = list_entry(area->free_list.next, struct page, lru); - list_del(&page->lru); - rmv_page_order(page); - area->nr_free--; - zone->free_pages -= 1UL << order; - expand(zone, page, order, current_order, area); - return page; +got_page: + for(i=0;i< order_size;i++){ + rmv_page_order(page+i); + set_page_count(page, 0); } - return NULL; + zone->free_pages -= 1UL << order; + return page; } /* @@ -1777,6 +1834,7 @@ void __meminit memmap_init_zone(unsigned reset_page_mapcount(page); SetPageReserved(page); INIT_LIST_HEAD(&page->lru); + INIT_LIST_HEAD(&page->cake_piece); #ifdef WANT_PAGE_VIRTUAL /* The shift won't overflow because ZONE_NORMAL is below 4G. */ if (!is_highmem_idx(zone)) @@ -1793,6 +1851,7 @@ void zone_init_free_lists(struct pglist_ INIT_LIST_HEAD(&zone->free_area[order].free_list); zone->free_area[order].nr_free = 0; } + INIT_LIST_HEAD(&zone->cake_list); } #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) @@ -2190,18 +2249,25 @@ static int frag_show(struct seq_file *m, struct zone *zone; struct zone *node_zones = pgdat->node_zones; unsigned long flags; - int order; + int num = 1; + struct list_head *p_cake_list; + struct page *page; for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { if (!populated_zone(zone)) continue; spin_lock_irqsave(&zone->lock, flags); - seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order < MAX_ORDER; ++order) - seq_printf(m, "%6lu ", zone->free_area[order].nr_free); + seq_printf(m, "--------Node %d, zone %s--------\n", + pgdat->node_id, zone->name); + __list_for_each(p_cake_list, &zone->cake_list){ + page = list_entry(p_cake_list, struct page, cake_piece); + seq_printf(m, "No.%-4d Page addr: 0x%-8x num: %-5d buddy addr:0x%-8x\n", + num++, (int)page_to_virt(page), (int)page->private, + (int)page_to_virt(page + page->private)); + } + seq_printf(m, "-------------End----------------\n"); spin_unlock_irqrestore(&zone->lock, flags); - seq_putc(m, '\n'); } return 0; } --- /home/aubrey/cvs/kernel/branch/uClinux-dist/linux-2.6.x/include/linux/mm.h 2006-06-06 11:32:09.000000000 +0800 +++ include/linux/mm.h 2006-12-29 01:59:16.000000000 +0800 @@ -250,6 +250,7 @@ struct page { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ + struct list_head cake_piece; /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with --- /home/aubrey/cvs/kernel/branch/uClinux-dist/linux-2.6.x/include/linux/mmzone.h 2006-12-30 23:02:02.000000000 +0800 +++ include/linux/mmzone.h 2006-12-30 23:02:54.000000000 +0800 @@ -145,6 +145,7 @@ struct zone { seqlock_t span_seqlock; #endif struct free_area free_area[MAX_ORDER]; + struct list_head cake_list; ZONE_PADDING(_pad1_) ---------------------------------------------------------------------------------------------------------------------------------
Attachment:
cake.diff
Description: Binary data
- Prev by Date: [PATCH -mm] fix for crash in adummy_init()
- Next by Date: [PATCH 3/2] fix flush_workqueue() vs CPU_DEAD race
- Previous by thread: [PATCH -mm] fix for crash in adummy_init()
- Next by thread: [PATCH 3/2] fix flush_workqueue() vs CPU_DEAD race
- Index(es):