Re: Add pacct_struct to fix some pacct bugs.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



    [PACCT] two phase process accounting

    The pacct facility need an i/o operation when an accounting record
    is generated. There is a possibility to wake OOM killer up.
    If OOM killer is activated, it kills some processes to make them
    release process memory regions.
    But acct_process() is called in the killed processes context
    before calling exit_mm(), so those processes cannot release
    own memory. In the results, any processes stop in this point and
    it finally cause a system stall.

    ---- in kernel/exit.c : do_exit() ------------
            group_dead = atomic_dec_and_test(&tsk->signal->live);
            if (group_dead) {
                    hrtimer_cancel(&tsk->signal->real_timer);
                    exit_itimers(tsk->signal);
                    acct_process(code);
            }
               :
           - snip -
               :
            exit_mm(tsk);
    ----------------------------------------------

    This patch separates generating an accounting record facility
    into two-phase.
    In the first one, acct_collect() calculate vitual memory size
    of the process and stores it into pacct_struct before exit_mm().
    Then, acct_process() generates an accounting record and write
    it into medium.

    Signed-off-by: KaiGai Kohei <[email protected]>

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 9a66401..6753687 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -121,16 +121,20 @@ struct acct_v3
 #ifdef CONFIG_BSD_PROCESS_ACCT
 struct vfsmount;
 struct super_block;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
+extern void acct_init_pacct(struct pacct_struct *pacct);
+extern void acct_collect();
 extern void acct_process(long exitcode);
 extern void acct_update_integrals(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
+#define acct_init_pacct(x)	do { } while (0)
+#define acct_collect()		do { } while (0)
 #define acct_process(x)		do { } while (0)
 #define acct_update_integrals(x)		do { } while (0)
 #define acct_clear_integrals(task)	do { } while (0)
 #endif

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 29b7d4f..918fdda 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -356,10 +356,14 @@ struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
 };

+struct pacct_struct {
+	unsigned long		ac_mem;
+};
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
  * implies a shared sighand_struct, so locking
  * sighand_struct is always a proper superset of
@@ -447,10 +451,13 @@ struct signal_struct {
 	 * thing in threads created with CLONE_THREAD */
 #ifdef CONFIG_KEYS
 	struct key *session_keyring;	/* keyring inherited over fork */
 	struct key *process_keyring;	/* keyring private to this process */
 #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct pacct_struct pacct;	/* per-process accounting information */
+#endif
 };

 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d..f1a4e12 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -419,13 +419,13 @@ static u32 encode_float(u64 value)
 /*
  *  do_acct_process does all actual work. Caller holds the reference to file.
  */
 static void do_acct_process(long exitcode, struct file *file)
 {
+	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
 	mm_segment_t fs;
-	unsigned long vsize;
 	unsigned long flim;
 	u64 elapsed;
 	u64 run_time;
 	struct timespec uptime;
 	unsigned long jiffies;
@@ -503,24 +503,13 @@ static void do_acct_process(long exitcod
 		ac.ac_flag |= ASU;
 	if (current->flags & PF_DUMPCORE)
 		ac.ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 		ac.ac_flag |= AXSIG;
-
-	vsize = 0;
-	if (current->mm) {
-		struct vm_area_struct *vma;
-		down_read(&current->mm->mmap_sem);
-		vma = current->mm->mmap;
-		while (vma) {
-			vsize += vma->vm_end - vma->vm_start;
-			vma = vma->vm_next;
-		}
-		up_read(&current->mm->mmap_sem);
-	}
-	vsize = vsize / 1024;
-	ac.ac_mem = encode_comp_t(vsize);
+	spin_lock(&current->sighand->siglock);
+	ac.ac_mem = encode_comp_t(pacct->ac_mem);
+	spin_unlock(&current->sighand->siglock);
 	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
 	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
 	ac.ac_minflt = encode_comp_t(current->signal->min_flt +
 				     current->min_flt);
 	ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
@@ -544,10 +533,42 @@ static void do_acct_process(long exitcod
 	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
 	set_fs(fs);
 }

 /**
+ * acct_init_pacct - initialize a new pacct_struct
+ */
+void acct_init_pacct(struct pacct_struct *pacct)
+{
+	memset(pacct, 0, sizeof(struct pacct_struct));
+}
+
+/**
+ * acct_collect - collect accounting information into pacct_struct
+ */
+void acct_collect(void)
+{
+	struct pacct_struct *pacct = &current->signal->pacct;
+	unsigned long vsize = 0;
+
+	if (current->mm) {
+		struct vm_area_struct *vma;
+		down_read(&current->mm->mmap_sem);
+		vma = current->mm->mmap;
+		while (vma) {
+			vsize += vma->vm_end - vma->vm_start;
+			vma = vma->vm_next;
+		}
+		up_read(&current->mm->mmap_sem);
+	}
+
+	spin_lock(&current->sighand->siglock);
+	pacct->ac_mem = vsize / 1024;
+	spin_unlock(&current->sighand->siglock);
+}
+
+/**
  * acct_process - now just a wrapper around do_acct_process
  * @exitcode: task exit code
  *
  * handles process accounting for an exiting task
  */
diff --git a/kernel/exit.c b/kernel/exit.c
index e06d0c1..54bdbd9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -893,11 +893,11 @@ fastcall NORET_TYPE void do_exit(long co
 	}
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
  		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
-		acct_process(code);
+		acct_collect();
 	}
 	if (unlikely(tsk->robust_list))
 		exit_robust_list(tsk);
 #ifdef CONFIG_COMPAT
 	if (unlikely(tsk->compat_robust_list))
@@ -905,10 +905,12 @@ fastcall NORET_TYPE void do_exit(long co
 #endif
 	if (unlikely(tsk->audit_context))
 		audit_free(tsk);
 	exit_mm(tsk);

+	if (group_dead)
+		acct_process(code);
 	exit_sem(tsk);
 	__exit_files(tsk);
 	__exit_fs(tsk);
 	exit_namespace(tsk);
 	exit_thread();
diff --git a/kernel/fork.c b/kernel/fork.c
index ac8100e..d6c812c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -869,10 +869,11 @@ static inline int copy_signal(unsigned l
 		 * of the whole CPU time limit.
 		 */
 		tsk->it_prof_expires =
 			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
 	}
+	acct_init_pacct(&sig->pacct);

 	return 0;
 }

 void __cleanup_signal(struct signal_struct *sig)


--
Open Source Software Promotion Center, NEC
KaiGai Kohei <[email protected]>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux