On 1/12/07, Linus Torvalds <[email protected]> wrote:
On Thu, 11 Jan 2007, Roy Huang wrote:
>
> On a embedded systerm, limiting page cache can relieve memory
> fragmentation. There is a patch against 2.6.19, which limit every
> opened file page cache and total pagecache. When the limit reach, it
> will release the page cache overrun the limit.
I do think that something like this is probably a good idea, even on
non-embedded setups. We historically couldn't do this, because mapped
pages were too damn hard to remove, but that's obviously not much of a
problem any more.
However, the page-cache limit should NOT be some compile-time constant. It
should work the same way the "dirty page" limit works, and probably just
default to "feel free to use 90% of memory for page cache".
Linus
The attached patch limit the page cache by a simple way:
1) If request memory from page cache, Set a flag to mark this kind of
allocation:
static inline struct page *page_cache_alloc(struct address_space *x)
{
- return __page_cache_alloc(mapping_gfp_mask(x));
+ return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE);
}
2) Have zone_watermark_ok done this limit:
+ if (alloc_flags & ALLOC_PAGECACHE){
+ min = min + VFS_CACHE_LIMIT;
+ }
+
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
return 0;
3) So, when __alloc_pages is called by page cache, pass the
ALLOC_PAGECACHE into get_page_from_freelist to trigger the pagecache
limit branch in zone_watermark_ok.
This approach works on my side, I'll make a new patch to make the
limit tunable in the proc fs soon.
The following is the patch:
=====================================================
Index: mm/page_alloc.c
===================================================================
--- mm/page_alloc.c (revision 2645)
+++ mm/page_alloc.c (working copy)
@@ -892,6 +892,9 @@ failed:
#define ALLOC_HARDER 0x10 /* try to alloc harder */
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+#define ALLOC_PAGECACHE 0x80 /* __GFP_PAGECACHE set */
+
+#define VFS_CACHE_LIMIT 0x400 /* limit VFS cache page */
/*
* Return 1 if free pages are above 'mark'. This takes into account the order
@@ -910,6 +913,10 @@ int zone_watermark_ok(struct zone *z, in
if (alloc_flags & ALLOC_HARDER)
min -= min / 4;
+ if (alloc_flags & ALLOC_PAGECACHE){
+ min = min + VFS_CACHE_LIMIT;
+ }
+
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
return 0;
for (o = 0; o < order; o++) {
@@ -1000,8 +1007,12 @@ restart:
return NULL;
}
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
- zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+ if (gfp_mask & __GFP_PAGECACHE)
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_PAGECACHE);
+ else
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
if (page)
goto got_pg;
@@ -1027,6 +1038,9 @@ restart:
if (wait)
alloc_flags |= ALLOC_CPUSET;
+ if (gfp_mask & __GFP_PAGECACHE)
+ alloc_flags |= ALLOC_PAGECACHE;
+
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
* coming from realtime tasks go deeper into reserves.
Index: include/linux/gfp.h
===================================================================
--- include/linux/gfp.h (revision 2645)
+++ include/linux/gfp.h (working copy)
@@ -46,6 +46,7 @@ struct vm_area_struct;
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use
emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce
hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_PAGECACHE ((__force gfp_t)0x80000u) /* Is page cache
allocation ? */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
Index: include/linux/pagemap.h
===================================================================
--- include/linux/pagemap.h (revision 2645)
+++ include/linux/pagemap.h (working copy)
@@ -62,7 +62,7 @@ static inline struct page *__page_cache_
static inline struct page *page_cache_alloc(struct address_space *x)
{
- return __page_cache_alloc(mapping_gfp_mask(x));
+ return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE);
}
static inline struct page *page_cache_alloc_cold(struct address_space *x)
=====================================================
Welcome any comments and suggestions,
Thanks,
-Aubrey
Index: mm/page_alloc.c
===================================================================
--- mm/page_alloc.c (revision 2645)
+++ mm/page_alloc.c (working copy)
@@ -892,6 +892,9 @@ failed:
#define ALLOC_HARDER 0x10 /* try to alloc harder */
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+#define ALLOC_PAGECACHE 0x80 /* __GFP_PAGECACHE set */
+
+#define VFS_CACHE_LIMIT 0x400 /* limit VFS cache page */
/*
* Return 1 if free pages are above 'mark'. This takes into account the order
@@ -910,6 +913,10 @@ int zone_watermark_ok(struct zone *z, in
if (alloc_flags & ALLOC_HARDER)
min -= min / 4;
+ if (alloc_flags & ALLOC_PAGECACHE){
+ min = min + VFS_CACHE_LIMIT;
+ }
+
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
return 0;
for (o = 0; o < order; o++) {
@@ -1000,8 +1007,12 @@ restart:
return NULL;
}
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
- zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+ if (gfp_mask & __GFP_PAGECACHE)
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_PAGECACHE);
+ else
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
if (page)
goto got_pg;
@@ -1027,6 +1038,9 @@ restart:
if (wait)
alloc_flags |= ALLOC_CPUSET;
+ if (gfp_mask & __GFP_PAGECACHE)
+ alloc_flags |= ALLOC_PAGECACHE;
+
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
* coming from realtime tasks go deeper into reserves.
Index: include/linux/gfp.h
===================================================================
--- include/linux/gfp.h (revision 2645)
+++ include/linux/gfp.h (working copy)
@@ -46,6 +46,7 @@ struct vm_area_struct;
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_PAGECACHE ((__force gfp_t)0x80000u) /* Is page cache allocation ? */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
Index: include/linux/pagemap.h
===================================================================
--- include/linux/pagemap.h (revision 2645)
+++ include/linux/pagemap.h (working copy)
@@ -62,7 +62,7 @@ static inline struct page *__page_cache_
static inline struct page *page_cache_alloc(struct address_space *x)
{
- return __page_cache_alloc(mapping_gfp_mask(x));
+ return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE);
}
static inline struct page *page_cache_alloc_cold(struct address_space *x)
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]