On Mon, Dec 04, 2006 at 03:55:52PM +0100, Nick Piggin wrote:
> Hi Mel,
>
> I think you're right about the leakage, thanks for catching it.
Yeah, it caused oom storm here.
The pagevec simplification looks nice.
I've ported it to -mm, hope it is useful.
I'm prepared to test your revised patch :)
Fengguang Wu
---
--- linux-2.6.19-rc6-mm2.orig/mm/filemap.c
+++ linux-2.6.19-rc6-mm2/mm/filemap.c
@@ -708,26 +708,18 @@ EXPORT_SYMBOL(find_lock_page);
struct page *find_or_create_page(struct address_space *mapping,
unsigned long index, gfp_t gfp_mask)
{
- struct page *page, *cached_page = NULL;
+ struct page *page;
int err;
repeat:
page = find_lock_page(mapping, index);
if (!page) {
- if (!cached_page) {
- cached_page = alloc_page(gfp_mask);
- if (!cached_page)
- return NULL;
- }
- err = add_to_page_cache_lru(cached_page, mapping,
- index, gfp_mask);
- if (!err) {
- page = cached_page;
- cached_page = NULL;
- } else if (err == -EEXIST)
+ page = alloc_page(gfp_mask);
+ if (!page)
+ return NULL;
+ err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+ if (err == -EEXIST)
goto repeat;
}
- if (cached_page)
- page_cache_release(cached_page);
return page;
}
EXPORT_SYMBOL(find_or_create_page);
@@ -922,11 +914,9 @@ void do_generic_mapping_read(struct addr
unsigned long next_index;
unsigned long prev_index;
loff_t isize;
- struct page *cached_page;
int error;
struct file_ra_state ra = *_ra;
- cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index;
prev_index = ra.prev_page;
@@ -1107,14 +1097,12 @@ no_cached_page:
* Ok, it wasn't cached, so we need to create a new
* page..
*/
- if (!cached_page) {
- cached_page = page_cache_alloc_cold(mapping);
- if (!cached_page) {
- desc->error = -ENOMEM;
- goto out;
- }
+ page = page_cache_alloc_cold(mapping);
+ if (!page) {
+ desc->error = -ENOMEM;
+ goto out;
}
- error = add_to_page_cache_lru(cached_page, mapping,
+ error = add_to_page_cache_lru(page, mapping,
index, GFP_KERNEL);
if (error) {
if (error == -EEXIST)
@@ -1122,8 +1110,6 @@ no_cached_page:
desc->error = error;
goto out;
}
- page = cached_page;
- cached_page = NULL;
goto readpage;
}
@@ -1133,8 +1119,6 @@ out:
_ra->prev_page = prev_index;
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
- if (cached_page)
- page_cache_release(cached_page);
if (filp)
file_accessed(filp);
}
@@ -1826,35 +1810,28 @@ static inline struct page *__read_cache_
int (*filler)(void *,struct page*),
void *data)
{
- struct page *page, *cached_page = NULL;
+ struct page *page;
int err;
repeat:
page = find_get_page(mapping, index);
if (!page) {
- if (!cached_page) {
- cached_page = page_cache_alloc_cold(mapping);
- if (!cached_page)
- return ERR_PTR(-ENOMEM);
- }
- err = add_to_page_cache_lru(cached_page, mapping,
- index, GFP_KERNEL);
+ page = page_cache_alloc_cold(mapping);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+ err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
if (err == -EEXIST)
goto repeat;
if (err < 0) {
/* Presumably ENOMEM for radix tree node */
- page_cache_release(cached_page);
+ page_cache_release(page);
return ERR_PTR(err);
}
- page = cached_page;
- cached_page = NULL;
err = filler(data, page);
if (err < 0) {
page_cache_release(page);
page = ERR_PTR(err);
}
}
- if (cached_page)
- page_cache_release(cached_page);
return page;
}
@@ -1905,40 +1882,6 @@ retry:
EXPORT_SYMBOL(read_cache_page);
/*
- * If the page was newly created, increment its refcount and add it to the
- * caller's lru-buffering pagevec. This function is specifically for
- * generic_file_write().
- */
-static inline struct page *
-__grab_cache_page(struct address_space *mapping, unsigned long index,
- struct page **cached_page, struct pagevec *lru_pvec)
-{
- int err;
- struct page *page;
-repeat:
- page = find_lock_page(mapping, index);
- if (!page) {
- if (!*cached_page) {
- *cached_page = page_cache_alloc(mapping);
- if (!*cached_page)
- return NULL;
- }
- err = add_to_page_cache(*cached_page, mapping,
- index, GFP_KERNEL);
- if (err == -EEXIST)
- goto repeat;
- if (err == 0) {
- page = *cached_page;
- page_cache_get(page);
- if (!pagevec_add(lru_pvec, page))
- __pagevec_lru_add(lru_pvec);
- *cached_page = NULL;
- }
- }
- return page;
-}
-
-/*
* The logic we want is
*
* if suid or (sgid and xgrp)
@@ -2143,15 +2086,11 @@ generic_file_buffered_write(struct kiocb
struct inode *inode = mapping->host;
long status = 0;
struct page *page;
- struct page *cached_page = NULL;
size_t bytes;
- struct pagevec lru_pvec;
const struct iovec *cur_iov = iov; /* current iovec */
size_t iov_base = 0; /* offset in the current iovec */
char __user *buf;
- pagevec_init(&lru_pvec, 0);
-
/*
* handle partial DIO write. Adjust cur_iov if needed.
*/
@@ -2189,10 +2128,23 @@ generic_file_buffered_write(struct kiocb
*/
fault_in_pages_readable(buf, bytes);
- page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
- if (!page) {
- status = -ENOMEM;
- break;
+
+repeat:
+ page = find_lock_page(mapping, index);
+ if (unlikely(!page)) {
+ page = page_cache_alloc(mapping);
+ if (!page) {
+ status = -ENOMEM;
+ break;
+ }
+ status = add_to_page_cache_lru(page, mapping,
+ index, GFP_KERNEL);
+ if (status) {
+ page_cache_release(page);
+ if (status == -EEXIST)
+ goto repeat;
+ break;
+ }
}
if (unlikely(bytes == 0)) {
@@ -2264,9 +2216,6 @@ zero_length_segment:
} while (count);
*ppos = pos;
- if (cached_page)
- page_cache_release(cached_page);
-
/*
* For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
*/
@@ -2286,7 +2235,6 @@ zero_length_segment:
if (unlikely(file->f_flags & O_DIRECT) && written)
status = filemap_write_and_wait(mapping);
- pagevec_lru_add(&lru_pvec);
return written ? written : status;
}
EXPORT_SYMBOL(generic_file_buffered_write);
--- linux-2.6.19-rc6-mm2.orig/fs/mpage.c
+++ linux-2.6.19-rc6-mm2/fs/mpage.c
@@ -389,33 +389,27 @@ mpage_readpages(struct address_space *ma
struct bio *bio = NULL;
unsigned page_idx;
sector_t last_block_in_bio = 0;
- struct pagevec lru_pvec;
struct buffer_head map_bh;
unsigned long first_logical_block = 0;
clear_buffer_mapped(&map_bh);
- pagevec_init(&lru_pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
+ if (!add_to_page_cache_lru(page, mapping,
page->index, GFP_KERNEL)) {
bio = do_mpage_readpage(bio, page,
nr_pages - page_idx,
&last_block_in_bio, &map_bh,
&first_logical_block,
get_block);
- if (!pagevec_add(&lru_pvec, page)) {
- cond_resched();
- __pagevec_lru_add(&lru_pvec);
- }
+ cond_resched();
} else {
page_cache_release(page);
}
}
- pagevec_lru_add(&lru_pvec);
BUG_ON(!list_empty(pages));
if (bio)
mpage_bio_submit(READ, bio);
--- linux-2.6.19-rc6-mm2.orig/mm/readahead.c
+++ linux-2.6.19-rc6-mm2/mm/readahead.c
@@ -230,30 +230,24 @@ int read_cache_pages(struct address_spac
int (*filler)(void *, struct page *), void *data)
{
struct page *page;
- struct pagevec lru_pvec;
int ret = 0;
- pagevec_init(&lru_pvec, 0);
-
while (!list_empty(pages)) {
page = list_to_page(pages);
list_del(&page->lru);
- if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
+ if (add_to_page_cache_lru(page, mapping,
+ page->index, GFP_KERNEL)) {
page_cache_release(page);
continue;
}
ret = filler(data, page);
- if (!pagevec_add(&lru_pvec, page)) {
- cond_resched();
- __pagevec_lru_add(&lru_pvec);
- }
- if (ret) {
+ if (unlikely(ret)) {
put_pages_list(pages);
break;
}
task_io_account_read(PAGE_CACHE_SIZE);
+ cond_resched();
}
- pagevec_lru_add(&lru_pvec);
return ret;
}
@@ -263,7 +257,6 @@ static int read_pages(struct address_spa
struct list_head *pages, unsigned nr_pages)
{
unsigned page_idx;
- struct pagevec lru_pvec;
int ret;
if (mapping->a_ops->readpages) {
@@ -273,21 +266,16 @@ static int read_pages(struct address_spa
goto out;
}
- pagevec_init(&lru_pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_to_page(pages);
list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
+ if (!add_to_page_cache_lru(page, mapping,
page->index, GFP_KERNEL)) {
mapping->a_ops->readpage(filp, page);
- if (!pagevec_add(&lru_pvec, page)) {
- cond_resched();
- __pagevec_lru_add(&lru_pvec);
- }
+ cond_resched();
} else
page_cache_release(page);
}
- pagevec_lru_add(&lru_pvec);
ret = 0;
out:
return ret;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]