Will show up shortly at http://ftp.kernel.org/pub/linux/kernel/people/christoph/slab-defrag/
Test results (see appended scripts / user space code for more data)
(3 level tree with 10 entries at first level , 20 at the second and 30 files at the
third level. Files at the lowest level were removed to create inode fragmentation)
%Ra is the allocation ratio (need to apply the slabinfo patch to get those numbers)
inode reclaim in reiserfs
Name Objects Objsize Space Slabs/Part/Cpu O/S O %Ra %Ef Flg
dentry 14660 200 3.0M 733/0/1 20 0 100 97 Da
reiser_inode_cache 1596 640 4.1M 256/201/1 25 2 24 24 DCa
Status after defrag
Name Objects Objsize Space Slabs/Part/Cpu O/S O %Ra %Ef Flg
dentry 8849 200 1.8M 454/17/1 20 0 97 95 Da
reiser_inode_cache 1381 640 1.0M 65/11/0 25 2 84 82 DCa
Slab defragmentation can be triggered in two ways:
1. Manually by running
slabinfo -s <slabs-to-shrink>
or manually by the kernel calling
kmem_cache_shrink(slab)
(Currently only ACPI is doing such a call to a slab that has no
defragmentation support. In that case we simply do what SLAB does:
drop per cpu caches and sift through partial list for free slabs).
2. Automatically if defragmentable slabs reach a certain degree of
fragmentation.
The point where slab defragmentation occurs is can be set at
/proc/sys/vm/slab_defrag_ratio
Slab fragmentation is measured by how much of the possible objects in a
slab are in use. The default setting for slab_defrag_ratio is 30%. This
means that slab fragmentation is going to be triggered if there are more than
3 free object slots for each allocated object.
Setting the slab_defrag_ratio higher will cause more defragmentation runs.
If slab_defrag_ratio is set to 0 then no slab defragmentation occurs.
Slabs are checked for their fragmentation levels after the slabs have been shrunk
by running shrinkers in vm/scan.c during memory reclaim. This means that slab
defragmentation is only triggered if we are under memory pressure and if there is
significant slab fragmentation.
V1->V2
- Clean up control flow using a state variable. Simplify API. Back to 2
functions that now take arrays of objects.
- Inode defrag support for a set of filesystems
- Fix up dentry defrag support to work on negative dentries by adding
a new dentry flag that indicates that a dentry is not in the process
of being freed or allocated.
V2->V3
- Support directory reclaim
- Add infrastructure to trigger slab defrag after slab shrinking if we
have slabs with a high degree of fragmentation.
Test script:
#!/bin/sh
echo 30 >/proc/sys/vm/slab_defrag_ratio
./gazfiles c 3 10 20 30
echo "Status before"
slabinfo -D
./gazfiles d 2
echo "Status after removing files"
slabinfo -D
slabinfo -s
echo "Status after defrag"
slabinfo -D
./gazfiles d 0
gazfiles.c :
/*
* Create a gazillion of files to be able to create slab fragmentation
*
* (C) 2007 sgi, Christoph Lameter <[email protected]>
*
* Create a n layered hierachy of files of empty files
*
* gazfiles <action> <levels> <n1> <n2> ...
*
* gazfiles c[reate] 3 50 50 50
*
* gazfiles s[hrink] <levels>
*
* gazfiles r[andomkill] <nr to kill>
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <getopt.h>
#include <regex.h>
#include <errno.h>
#define MAXIMUM_LEVELS 10
int level;
int sizes[MAXIMUM_LEVELS];
void fatal(const char *x, ...)
{
va_list ap;
va_start(ap, x);
vfprintf(stderr, x, ap);
va_end(ap);
exit(1);
}
int read_gaz(void)
{
FILE *f = fopen(".gazinfo", "r");
int rc = 0;
int i;
if (!f)
return 0;
if (!fscanf(f, "%d", &level))
goto out;
if (level >= MAXIMUM_LEVELS)
goto out;
for (i = 0; i < level; i++)
if (!fscanf(f, " %d", &sizes[i]))
goto out;
rc = 1;
out:
fclose(f);
return rc;
}
void write_gaz(void)
{
FILE *f = fopen(".gazinfo","w");
int i;
fprintf(f, "%d",level);
for (i = 0; i < level; i++)
fprintf(f," %d", sizes[i]);
fprintf(f, "\n");
fclose(f);
}
void cre(int l)
{
int i;
for (i = 0; i < sizes[l - 1]; i++) {
char name[20];
sprintf(name, "%03d", i);
if (l < level) {
mkdir(name, 0775);
chdir(name);
cre(l + 1);
chdir("..");
} else {
FILE *f;
f = fopen(name,"w");
fprintf(f, "Test");
fclose(f);
}
}
}
void create(int l, char **sz)
{
int i;
level = l;
for (i = 0; i < level; i++)
sizes[i] = atoi(sz[i]);
if (mkdir("gazf", 0775))
fatal("Cannot create gazf here\n");
chdir("gazf");
write_gaz();
cre(1);
chdir("..");
}
void shrink(int level)
{
if (chdir("gazf"))
fatal("No gazfiles in this directory");
read_gaz();
chdir("..");
}
void scand(int l, void (*func)(int, int, char *, unsigned long),
unsigned long level)
{
DIR *dir;
struct dirent *de;
dir = opendir(".");
if (!dir)
fatal("Cannot open directory");
while ((de = readdir(dir))) {
struct stat s;
if (de->d_name[0] == '.')
continue;
/*
* Some idiot broke the glibc library or made it impossible
* to figure out how to make readdir work right
*/
stat(de->d_name, &s);
if (S_ISDIR(s.st_mode))
de->d_type = DT_DIR;
if (de->d_type == DT_DIR) {
if (chdir(de->d_name))
fatal("Cannot enter %s", de->d_name);
scand(l + 1, func, level);
chdir("..");
func(l, 1, de->d_name, level);
} else {
func(l, 0, de->d_name, level);
}
}
closedir(dir);
}
void traverse(void (*func)(int, int, char *, unsigned long),
unsigned long level)
{
if (chdir("gazf"))
fatal("No gazfiles in this directory");
scand(1, func, level);
chdir("..");
}
void randomkill(int nr)
{
if (chdir("gazf"))
fatal("No gazfiles in this directory");
read_gaz();
chdir("..");
}
void del_func(int l, int dir, char *name, unsigned long level)
{
if (l <= level)
return;
if (dir) {
if (rmdir(name))
fatal("Cannot remove directory %s");
} else {
if (unlink(name))
fatal("Cannot unlink file %s");
}
}
void delete(int l)
{
if (l == 0) {
system("rm -rf gazf");
return;
}
traverse(del_func, l);
}
void usage(void)
{
printf("gazfiles: Tool to manage gazillions of files\n\n");
printf("gazfiles create <levels> <#l1> <#l2> ...\n");
printf("gazfiles delete <levels>\n");
printf("gazfiles shrink <levels>\n");
printf("gazfiles randomkill <nr>\n\n");
printf("(C) 2007 sgi, Christoph Lameter <[email protected]>\n");
exit(0);
}
int main(int argc, char *argv[])
{
if (argc < 2)
usage();
switch (argv[1][0]) {
case 'c' :
create(atoi(argv[2]), argv + 3);
break;
case 's' :
if (argc != 3)
usage();
shrink(atoi(argv[2]));
break;
case 'r' :
if (argc != 3)
usage();
randomkill(atoi(argv[2]));
break;
case 'd':
if (argc != 3)
usage();
delete(atoi(argv[2]));
break;
default:
usage();
}
return 0;
}
--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]