checkpointing and restoring processes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi everyone!

i'm not subscribed to the list, so if you care to flame because of my noob 
question, just do it to the list, otherwise please cc me.

i'm trying to write a checkpoint/restore module for processes and so have 
a basic version going already - problem is, when i restore the process, 
one of three things happens at random. first is, the process restored 
segfaults. second is, i get a kernel null pointer dereference and third 
is, i get a virtual address lookup error and a kernel crash. the trace 
back and the address always change.

the user space process is as simple as i could make it: (error checking 
and debugging messages are left out)


void take_chkpt(void) {
        pid_t pid;
        char call_pid[10];
        char call_num[10];

        chkptpid = getpid();
        snprintf(call_pid, 9, "%d", chkptpid);
        snprintf(call_num, 9, "%d", checkpointnum);

	switch(pid = fork()) {
	case -1:
                fprintf(stderr, "Fork failed.\n");
                return;
                break;
	case  0:   /* child process */
                if(!execl("child_take", call_pid, call_num, (char *)0))
                        perror("execl: ");
                break;
	default:   /* parent process */
                waitpid(pid, NULL, 0);
                break;
	}

        return;
}


void restore_chkpts(void) {
        pid_t pid;
        char call_pid[10];
        char call_num[10];

	ENTERFUN();

        if(restore_retry) // do nothing on second call to restore
                return;

        chkptpid = getpid();
        snprintf(call_pid, 9, "%d", chkptpid);
        snprintf(call_num, 9, "%d", checkpointnum);

	switch(pid = fork()) {
	case -1:
                fprintf(stderr, "MP: Fork failed.\n");
                return;
                break;
	case  0:   /* child process */
                if(!execl("child_restore", call_pid, call_num, (char *)0))
                        perror("execl: ");
                break;
	default:   /* parent process */
                INF(("Parent Process"));
                restore_retry=1;
                INF(("Wait for Child..."));
                waitpid(pid, NULL, 0);
                break;
	}

	LEAVEFUN();

        return;
}

int main(int argc, char* argv[]) {
	take_chkpt();
	printf("Hello cruel world!\n");
	restore_chkpts();
	return 0;
}

where child_take and child_restore do the following:


void child_take_chkpt(int chkptpid, int checkpointnum) {
        struct chkpt_ioctl chkptio;
        int dev_fd; // ioctl device file
        char chkptname[30];

        if ((dev_fd = open(CHKPT_DEVICE, O_RDWR)) < 0) {
                perror("MP: Open device file");
                exit(EXIT_FAILURE);
        }
        chkptio.pid = chkptpid;
        snprintf(chkptname, 29, "/tmp/chkpt_%d_%d", chkptio.pid, checkpointnum);
        chkptio.file = creat(chkptname, 00755);
        sleep(1); // to go sure the parent process is in waitpid -- ugly, 
but works
        kill(chkptio.pid, SIGSTOP);
        sleep(1);
        ioctl(dev_fd, CHKPT_IOCTL_SAVE, (unsigned long)&chkptio);
        close(dev_fd);
        close(chkptio.file);
        kill(chkptio.pid, SIGCONT);
        exit(0);
}

void child_restore_chkpts(int chkptpid, int checkpointnum) {
        struct chkpt_ioctl chkptio;
        int dev_fd; // ioctl device file
        char chkptname[30];

        snprintf(chkptname, 29, "/tmp/chkpt_%d_%d", chkptpid, checkpointnum-1);
        chkptio.file = open(chkptname, O_RDONLY);
        chkptio.pid = chkptpid;
        dev_fd = open(CHKPT_DEVICE, O_RDWR);
        sleep(1);
        kill(chkptpid, SIGSTOP);
        sleep(1);
        ioctl(dev_fd, CHKPT_IOCTL_RESTORE, (unsigned long)&chkptio);
        close(chkptio.file);
        close(dev_fd);
        kill(chkptpid, SIGCONT);
        exit(0);
}

the header for the files is this:


enum {
        CHKPT_IOCTL_SAVE,
        CHKPT_IOCTL_RESTORE
};

struct chkpt_ioctl {
        pid_t pid; // for fork tests
        int file;
};

struct chkpt {
        pid_t pid; // for fork tests
        struct pt_regs regs;
        unsigned int datasize;
        unsigned int brksize;
        unsigned int stacksize;
};


and finally the kernel module:

int chkpt_ioctl_handler(struct inode *i, struct file *f,
                     unsigned int cmd, unsigned long arg) {
        struct chkpt_ioctl pmio, *u_pmio;
        int ret = -1;

        u_pmio = (struct chkpt_ioctl *)arg;

        switch(cmd) {
        case CHKPT_IOCTL_SAVE:
                if (copy_from_user(&pmio, u_pmio, sizeof(struct 
chkpt_ioctl))) {
                        printk("...failed to copy from user\n");
                        ret = -1;
                        break;
                }
                if(chkpt_save(&pmio) < 0) {
                        printk("...failed to save chkpt\n");
                        ret = -1;
                        break;
                }
                ret = 0;
                break;
        case CHKPT_IOCTL_RESTORE:
                INFO(("CHKPT_IOCTL_RESTORE"));
                if (copy_from_user(&pmio, u_pmio, sizeof(struct 
chkpt_ioctl))) {
                        printk("...failed to copy from user\n");
                        ret = -1;
                        break;
                }
                if (chkpt_restore(&pmio) < 0) {
                        printk("...failed to restore chkpt\n");
                        ret = -1;
                        break;
                }        
                ret = 0;
                break;
        default:
                printk("...illegal ioctl cmd\n");
                ret = -1;
                break;
        }
        return ret;
}

static int chkpt_save(struct chkpt_ioctl *chkptio) {
        struct task_struct *tsk;
        struct chkpt chkpt;
        unsigned int datasz, brksz, stacksz;
        struct file *f;

        if (!(tsk = find_task_by_pid(chkptio->pid))) {
                printk("...task %d not found\n", chkptio->pid);
                return -1;
        }

        f = current->files->fd[chkptio->file];

        datasz = tsk->mm->end_data - tsk->mm->start_data; // data
        brksz = tsk->mm->brk - tsk->mm->start_brk; // brk
        stacksz = tsk->thread.esp0 - tsk->thread.esp; // stack

        /* saving most important information belonging to tsk */
        /* NO FILES, SOCKETS, PIPES, SHARED MEMORY AND SEMAPHORES */
        chkpt.pid = chkptio->pid;
        /* REGISTERS */
        memcpy(&chkpt.regs, REGS, sizeof(struct pt_regs));
        if (in_syscall(tsk))
                intr_syscall(&chkpt.regs);
        chkpt.datasize = datasz;
        chkpt.brksize = brksz;
        chkpt.stacksize = stacksz;
        pack_write(f, (void *)&chkpt, sizeof(struct chkpt), 0);
        /* TASK */
        pack_write(f, (void*)tsk, THREAD_SIZE, 0);
        /* MEMORY */
        pack_write(f, (void *)tsk->mm->start_data, datasz, 0);
        pack_write(f, (void *)tsk->mm->start_brk, brksz, 0);
        pack_write(f, (void *)tsk->thread.esp, stacksz, 0);

        pack_write(f, NULL, 0, 1); /* last packet */

        return 0;
}

static int pack_write (struct file *f, char *buf, int len,
		       int last_pkt) {
    static char *pack = NULL;
    static long pos = 0;
    int ret, to_copy, wrtn = 0;
    
    if (pack==NULL)
    {
	pack=(char*)kmalloc(PACKET_SIZE, GFP_KERNEL);
	if (!pack)
	{
	    printk("pack_write: no mem!\n");
	    return -1;
	}
    }
    
    while (len>0)
    {
	to_copy = (len>(PACKET_SIZE-pos))?(PACKET_SIZE-pos):(len);
	
        memcpy(&(pack[pos]), buf+wrtn, to_copy);
	
	pos += to_copy;
	len -= to_copy;
	wrtn +=to_copy; 
	
	if ( (pos==PACKET_SIZE) || (last_pkt) )
	{
	    mm_segment_t fs = get_fs();
	    
	    set_fs(KERNEL_DS);
	    ret = f->f_op->write(f, pack, pos, &(f->f_pos));	
	    set_fs(fs);
	    if (ret!=pos)
		return ret;
	    
	    pos = 0;
	    if (last_pkt)
	    {
		kfree(pack);
		pack = NULL;
	    }
	}
    }
    
    if ( (last_pkt) && (pack!=NULL) )
    {
	if (pos!=0)
	{
	    mm_segment_t fs = get_fs();
	    
	    set_fs(KERNEL_DS);
	    wrtn = f->f_op->write(f, pack, pos, &f->f_pos);
	    set_fs(fs);
	}
	kfree(pack);
	pack = NULL;
	pos = 0;
    }
    
    return wrtn;
}

static int in_syscall(struct task_struct *tsk) {
        unsigned char ins, opc;
        long ret;
        unsigned long flags;
        struct pt_regs *regs;

        spin_lock_irqsave(&runqueue_lock, flags);

        regs = (((struct pt_regs *)(THREAD_SIZE + (unsigned long)tsk)) - 
1);

        get_user(ins, (unsigned char *)(regs->eip) - 2);
        get_user(opc, (unsigned char *)(regs->eip) - 1);

        if ((ins == 0xCD) && (opc == 0x80)) {
                ret = regs->orig_eax;
        } else {
                ret = 0;
        }
        
        if (ret && ((regs->orig_eax < 0) || (regs->orig_eax > 
NR_syscalls))) {
                INFO(("syscall number out of bounds %ld\n", 
regs->orig_eax));
                ret = 0;
        }
        
        spin_unlock_irqrestore(&runqueue_lock, flags);

        return ret;
}

static int intr_syscall(struct pt_regs *regs) {
        
        /* handle in_syscall depending on syscall number */
        switch(regs->orig_eax) {
        case 4: /* write */
                /* report interuption */
                regs->eax = -EINTR;
                break;
        default:
                /* restart */
                regs->eax = regs->orig_eax;
                regs->eip -= 2;
                break;
        }

        return regs->orig_eax;
}

static int chkpt_restore(struct chkpt_ioctl *chkptio) {
        struct task_struct *tsk, *saved_tsk;
        struct chkpt chkpt;
        struct file *f;

        if ((tsk = find_task_by_pid(chkptio->pid)) == NULL) {
                printk("...failed task not found %u\n", chkptio->pid);
                return -1;
        }


        if ((saved_tsk = kmalloc(sizeof(struct task_struct), GFP_KERNEL)) 
== NULL) {
                printk("kmalloc failed\n");
                return -1;
        }

        f = current->files->fd[chkptio->file];
        do_read(f, &f->f_pos, (void *)&chkpt, sizeof(struct chkpt));
        do_read(f, &f->f_pos, (void *)saved_tsk, THREAD_SIZE);

        /* TASK */
        if (pm_overwrite(tsk, saved_tsk) == NULL) {
                printk("...failed task overwrite\n");
                return -1;
        }
        /* REGISTERS */
        memcpy(REGS, &chkpt.regs, sizeof(struct pt_regs));
        /* MEMORY */
        do_read(f, &f->f_pos, (void *)tsk->mm->start_data, 
chkpt.datasize);
        do_read(f, &f->f_pos, (void *)chkpt.regs.esp, chkpt.stacksize);
        do_read(f, &f->f_pos, (void *)tsk->mm->start_brk, chkpt.brksize);
        kfree(saved_tsk);

        return 0;
}


static int inline do_read(struct file *file, loff_t *offset,
			  char * addr, unsigned long count) {
    mm_segment_t old_fs;
    int ret;
    
    if (!file->f_op->read)
	return -ENOSYS;
    old_fs = get_fs();
    set_fs(get_ds());
    ret = file->f_op->read(file, addr, count, offset);
    set_fs(old_fs);
    return ret;
}

/* overwrite tsk with the one in pmio, making a backup first,
   then restoring all values dependant on the current local
   OS state (except register values) */
static struct task_struct *pm_overwrite(struct task_struct *tsk,
                                        struct task_struct *saved_tsk) {
	struct task_struct *backup;

        if ((backup = kmalloc(sizeof(*backup), GFP_KERNEL)) == NULL) {
                printk("kmalloc failed\n");
                return NULL;
        }

        memcpy(backup, tsk, sizeof(struct task_struct));

        write_lock_irq(&tasklist_lock);
        
        memcpy(tsk, saved_tsk, THREAD_SIZE);

        tsk->next_task = backup->next_task;
        tsk->prev_task = backup->prev_task;

        tsk->exec_domain = backup->exec_domain;
        tsk->binfmt      = backup->binfmt;

        tsk->pid          = backup->pid;
        tsk->pgrp         = backup->pgrp;
        tsk->tty_old_pgrp = backup->tty_old_pgrp;
        tsk->session      = backup->session;
        tsk->tgid         = backup->tgid;
        tsk->leader       = backup->leader;

        tsk->p_opptr = backup->p_opptr;
        tsk->p_pptr  = backup->p_pptr;
        tsk->p_cptr  = backup->p_cptr;
        tsk->p_ysptr = backup->p_ysptr;
        tsk->p_osptr = backup->p_osptr;

        tsk->thread_group = backup->thread_group;

        tsk->pidhash_next  = backup->pidhash_next;
        tsk->pidhash_pprev = backup->pidhash_pprev;

        tsk->real_timer    = backup->real_timer;

        tsk->uid   = backup->uid;
        tsk->euid  = backup->euid;
        tsk->suid  = backup->suid;
        tsk->fsuid = backup->fsuid;
        tsk->gid   = backup->gid;
        tsk->egid  = backup->egid;
        tsk->sgid  = backup->sgid;
        tsk->fsgid = backup->fsgid;

        tsk->ngroups = backup->ngroups;
        memcpy(tsk->groups, backup->groups, sizeof(tsk->groups));

        tsk->cap_effective     = backup->cap_effective;
        tsk->cap_inheritable   = backup->cap_inheritable;
        tsk->cap_permitted     = backup->cap_permitted;
        tsk->keep_capabilities = backup->keep_capabilities;

        tsk->user = backup->user;

        tsk->tty = backup->tty;

        tsk->semundo     = backup->semundo;
        tsk->semsleeping = backup->semsleeping;

        tsk->fs = backup->fs;
        
        tsk->files = backup->files;

        tsk->sigmask_lock = backup->sigmask_lock;
        tsk->sig          = backup->sig;
        tsk->pending      = backup->pending;

        tsk->sas_ss_sp     = backup->sas_ss_sp;
        tsk->sas_ss_size   = backup->sas_ss_size;
        tsk->notifier      = backup->notifier;
        tsk->notifier_data = backup->notifier_data;
        tsk->notifier_mask = backup->notifier_mask;

        tsk->parent_exec_id = backup->parent_exec_id;
        tsk->self_exec_id   = backup->self_exec_id;

        tsk->alloc_lock = backup->alloc_lock;

        tsk->mm        = backup->mm;
        tsk->active_mm = backup->active_mm;

        write_unlock_irq(&tasklist_lock);
        kfree(backup);
        return tsk;
}

/* standard module stuff */

int __init chkpt_init(void)
{
	if (register_chrdev(CHKPT_DEV_MAJOR, "chkpt", &file_ops)) {
		printk("...failed register_chrdev\n");
		return -1;
	}
	return 0;
}

void __exit chkpt_exit(void)
{
	unregister_chrdev(CHKPT_DEV_MAJOR, "chkpt");
	return;
}

EXPORT_SYMBOL(chkpt_init);
EXPORT_SYMBOL(chkpt_exit);
EXPORT_SYMBOL(chkpt_ioctl_handler);

if anyone has any ideas, please let me know. thanx in advance.

greetings

marks
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux