[patch 00/12] Slab defragmentation V3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Will show up shortly at http://ftp.kernel.org/pub/linux/kernel/people/christoph/slab-defrag/

Test results (see appended scripts / user space code for more data)

(3 level tree with 10 entries at first level , 20 at the second and 30 files at the
third level. Files at the lowest level were removed to create inode fragmentation)

%Ra is the allocation ratio (need to apply the slabinfo patch to get those numbers)

inode reclaim in reiserfs

Name                   Objects Objsize    Space Slabs/Part/Cpu  O/S O %Ra %Ef Flg
dentry                   14660     200     3.0M        733/0/1   20 0 100  97 Da
reiser_inode_cache        1596     640     4.1M      256/201/1   25 2  24  24 DCa

Status after defrag

Name                   Objects Objsize    Space Slabs/Part/Cpu  O/S O %Ra %Ef Flg
dentry                    8849     200     1.8M       454/17/1   20 0  97  95 Da
reiser_inode_cache        1381     640     1.0M        65/11/0   25 2  84  82 DCa



Slab defragmentation can be triggered in two ways:

1. Manually by running

slabinfo -s <slabs-to-shrink>

or manually by the kernel calling

kmem_cache_shrink(slab)

(Currently only ACPI is doing such a call to a slab that has no
defragmentation support. In that case we simply do what SLAB does:
drop per cpu caches and sift through partial list for free slabs).

2. Automatically if defragmentable slabs reach a certain degree of
   fragmentation.

The point where slab defragmentation occurs is can be set at

/proc/sys/vm/slab_defrag_ratio

Slab fragmentation is measured by how much of the possible objects in a
slab are in use. The default setting for slab_defrag_ratio is 30%. This
means that slab fragmentation is going to be triggered if there are more than
3 free object slots for each allocated object.

Setting the slab_defrag_ratio higher will cause more defragmentation runs.
If slab_defrag_ratio is set to 0 then no slab defragmentation occurs.

Slabs are checked for their fragmentation levels after the slabs have been shrunk
by running shrinkers in vm/scan.c during memory reclaim. This means that slab
defragmentation is only triggered if we are under memory pressure and if there is
significant slab fragmentation.

V1->V2
- Clean up control flow using a state variable. Simplify API. Back to 2
  functions that now take arrays of objects.
- Inode defrag support for a set of filesystems
- Fix up dentry defrag support to work on negative dentries by adding
  a new dentry flag that indicates that a dentry is not in the process
  of being freed or allocated.

V2->V3
- Support directory reclaim
- Add infrastructure to trigger slab defrag after slab shrinking if we
  have slabs with a high degree of fragmentation.



Test script:

#!/bin/sh

echo 30 >/proc/sys/vm/slab_defrag_ratio

./gazfiles c 3 10 20 30
echo "Status before"
slabinfo -D
./gazfiles d 2
echo "Status after removing files"
slabinfo -D
slabinfo -s
echo "Status after defrag"
slabinfo -D
./gazfiles d 0


gazfiles.c :

/*
 * Create a gazillion of files to be able to create slab fragmentation
 *
 * (C) 2007 sgi, Christoph Lameter <[email protected]>
 *
 * Create a n layered hierachy of files of empty files
 *
 * gazfiles <action> <levels> <n1> <n2> ...
 *
 * gazfiles c[reate] 3 50 50 50
 *
 * gazfiles s[hrink] <levels>
 *
 * gazfiles r[andomkill] <nr to kill> 
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <getopt.h>
#include <regex.h>
#include <errno.h>

#define MAXIMUM_LEVELS 10

int level;
int sizes[MAXIMUM_LEVELS];

void fatal(const char *x, ...)
{
        va_list ap;

        va_start(ap, x);
        vfprintf(stderr, x, ap);
        va_end(ap);
        exit(1);
}

int read_gaz(void)
{
	FILE *f = fopen(".gazinfo", "r");
	int rc = 0;
	int i;

	if (!f)
		return 0;

	if (!fscanf(f, "%d", &level))
		goto out;

	if (level >= MAXIMUM_LEVELS)
		goto out;

	for (i = 0; i < level; i++)
		if (!fscanf(f, " %d", &sizes[i]))
			goto out;
	rc = 1;
out:
	fclose(f);
	return rc;
}

void write_gaz(void)
{
	FILE *f = fopen(".gazinfo","w");
	int i;

	fprintf(f, "%d",level);
	for (i = 0; i < level; i++)
		fprintf(f," %d", sizes[i]);
	fprintf(f, "\n");
	fclose(f);
}

void cre(int l)
{
	int i;

	for (i = 0; i < sizes[l - 1]; i++) {
		char name[20];

		sprintf(name, "%03d", i);

		if (l < level) {
			mkdir(name, 0775);
			chdir(name);
			cre(l + 1);
			chdir("..");
		} else {
			FILE *f;

			f = fopen(name,"w");
			fprintf(f, "Test");
			fclose(f);
		}
	}
}

void create(int l, char **sz)
{
	int i;

	level = l;
	for (i = 0; i < level; i++)
		sizes[i] = atoi(sz[i]);

	if (mkdir("gazf", 0775))
		fatal("Cannot create gazf here\n");
	chdir("gazf");
	write_gaz();
	cre(1);
	chdir("..");
}

void shrink(int level)
{
	if (chdir("gazf"))
		fatal("No gazfiles in this directory");
	read_gaz();
	chdir("..");
}

void scand(int l, void (*func)(int, int, char *, unsigned long),
			unsigned long level)
{
	DIR *dir;
	struct dirent *de;

	dir = opendir(".");
	if (!dir)
		fatal("Cannot open directory");
	while ((de = readdir(dir))) {
		struct stat s;

		if (de->d_name[0] == '.')
			continue;

		/*
		 * Some idiot broke the glibc library or made it impossible
		 * to figure out how to make readdir work right
		 */

		stat(de->d_name, &s);
		if (S_ISDIR(s.st_mode))
			de->d_type = DT_DIR;

		if (de->d_type == DT_DIR) {
			if (chdir(de->d_name))
				fatal("Cannot enter %s", de->d_name);
			scand(l + 1, func, level);
			chdir("..");
			func(l, 1, de->d_name, level);
		} else {
			func(l, 0, de->d_name, level);
		}
	}
	closedir(dir);
}

void traverse(void (*func)(int, int, char *, unsigned long),
		unsigned long level)
{
	if (chdir("gazf"))
		fatal("No gazfiles in this directory");
	scand(1, func, level);
	chdir("..");
}

void randomkill(int nr)
{
	if (chdir("gazf"))
		fatal("No gazfiles in this directory");
	read_gaz();
	chdir("..");
}

void del_func(int l, int dir, char *name, unsigned long level)
{
	if (l <= level)
		return;
	if (dir) {
		if (rmdir(name))
			fatal("Cannot remove directory %s");
	} else {
		if (unlink(name))
			fatal("Cannot unlink file %s");
	}
}

void delete(int l)
{
	if (l == 0) {
		system("rm -rf gazf");
		return;
	}
	traverse(del_func, l);
}

void usage(void)
{
	printf("gazfiles: Tool to manage gazillions of files\n\n");
	printf("gazfiles create <levels> <#l1> <#l2> ...\n");
	printf("gazfiles delete <levels>\n");
	printf("gazfiles shrink <levels>\n");
	printf("gazfiles randomkill <nr>\n\n");
	printf("(C) 2007 sgi, Christoph Lameter <[email protected]>\n");
	exit(0);
}

int main(int argc, char *argv[])
{
	if (argc  <  2)
		usage();

	switch (argv[1][0]) {
		case 'c' :
			create(atoi(argv[2]), argv + 3);
			break;
		case 's' :
			if (argc != 3)
				usage();

			shrink(atoi(argv[2]));
			break;
		case 'r' :
			if (argc != 3)
				usage();

			randomkill(atoi(argv[2]));
			break;
		case 'd':
			if (argc != 3)
				usage();
			delete(atoi(argv[2]));
			break;

		default:
			usage();
	}
	return 0;
}
-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux