From bb5b603036b99d80527b222dc98eba4ee8341020 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Tue, 12 Jul 2011 19:53:24 -0700
Subject: [PATCH 463/696] cgroup: Add generic cgroup subsystem permission checks

Rather than using explicit euid == 0 checks when trying to move
tasks into a cgroup via CFS, move permission checks into each
specific cgroup subsystem. If a subsystem does not specify a
'allow_attach' handler, then we fall back to doing our checks
the old way.

Use the 'allow_attach' handler for the 'cpu' cgroup to allow
non-root processes to add arbitrary processes to a 'cpu' cgroup
if it has the CAP_SYS_NICE capability set.

This version of the patch adds a 'allow_attach' handler instead
of reusing the 'can_attach' handler.  If the 'can_attach' handler
is reused, a new cgroup that implements 'can_attach' but not
the permission checks could end up with no permission checks
at all.

Change-Id: Icfa950aa9321d1ceba362061d32dc7dfa2c64f0c
Original-Author: San Mehat <san@google.com>
Signed-off-by: Colin Cross <ccross@android.com>
---
 Documentation/cgroups/cgroups.txt |    9 +++++++++
 include/linux/cgroup.h            |    1 +
 kernel/cgroup.c                   |   31 ++++++++++++++++++++++++++++---
 kernel/sched.c                    |   15 +++++++++++++++
 4 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index cd67e90..60d82e1 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -593,6 +593,15 @@ there are not tasks in the cgroup. If pre_destroy() returns error code,
 rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 
+int allow_attach(struct cgroup *cgrp, struct task_struct *task)
+(cgroup_mutex held by caller)
+
+Called prior to moving a task into a cgroup; if the subsystem
+returns an error, this will abort the attach operation.  Used
+to extend the permission checks - if all subsystems in a cgroup
+return 0, the attach will be allowed to proceed, even if the
+default permission check (root or same user) fails.
+
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	       struct task_struct *task)
 (cgroup_mutex held by caller)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0c34e9c..543250e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -459,6 +459,7 @@ struct cgroup_subsys {
 						  struct cgroup *cgrp);
 	int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
+	int (*allow_attach)(struct cgroup *cgrp, struct task_struct *tsk);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			  struct task_struct *tsk);
 	int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f6e7b4a..241b74a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2199,6 +2199,24 @@ out_free_group_list:
 	return retval;
 }
 
+static int cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
+{
+	struct cgroup_subsys *ss;
+	int ret;
+
+	for_each_subsys(cgrp->root, ss) {
+		if (ss->allow_attach) {
+			ret = ss->allow_attach(cgrp, tsk);
+			if (ret)
+				return ret;
+		} else {
+			return -EACCES;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Find the task_struct of the task to attach by vpid and pass it along to the
  * function to attach either it or all tasks in its threadgroup. Will take
@@ -2244,9 +2262,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 		if (cred->euid &&
 		    cred->euid != tcred->uid &&
 		    cred->euid != tcred->suid) {
-			rcu_read_unlock();
-			cgroup_unlock();
-			return -EACCES;
+			/*
+			 * if the default permission check fails, give each
+			 * cgroup a chance to extend the permission check
+			 */
+			ret = cgroup_allow_attach(cgrp, tsk);
+			if (ret) {
+				rcu_read_unlock();
+				cgroup_unlock();
+				return ret;
+			}
 		}
 		get_task_struct(tsk);
 		rcu_read_unlock();
diff --git a/kernel/sched.c b/kernel/sched.c
index db2b408..bdfcce7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8799,6 +8799,20 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 }
 
 static int
+cpu_cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
+{
+	const struct cred *cred = current_cred(), *tcred;
+
+	tcred = __task_cred(tsk);
+
+	if ((current != tsk) && !capable(CAP_SYS_NICE) &&
+	    cred->euid != tcred->uid && cred->euid != tcred->suid)
+		return -EACCES;
+
+	return 0;
+}
+
+static int
 cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -8903,6 +8917,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
 	.create		= cpu_cgroup_create,
 	.destroy	= cpu_cgroup_destroy,
+	.allow_attach	= cpu_cgroup_allow_attach,
 	.can_attach_task = cpu_cgroup_can_attach_task,
 	.attach_task	= cpu_cgroup_attach_task,
 	.exit		= cpu_cgroup_exit,
-- 
1.7.1


