From d727abcb2355566a3372ee1810f156fba75112b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 14 Jun 2012 02:16:42 -0700 Subject: netns: Deduplicate and fix copy_net_ns when !CONFIG_NET_NS The copy of copy_net_ns used when the network stack is not built is broken as it does not return -EINVAL when attempting to create a new network namespace. We don't even have a previous network namespace. Since we need a copy of copy_net_ns in net/net_namespace.h that is available when the networking stack is not built at all move the correct version of copy_net_ns from net_namespace.c into net_namespace.h Leaving us with just 2 versions of copy_net_ns. One version for when we compile in network namespace suport and another stub for all other occasions. Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/net/net_namespace.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 95e646641184..32dcb6085ebe 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -126,16 +126,19 @@ struct net { /* Init's network namespace */ extern struct net init_net; -#ifdef CONFIG_NET +#ifdef CONFIG_NET_NS extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); -#else /* CONFIG_NET */ -static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns) +#else /* CONFIG_NET_NS */ +#include +#include +static inline struct net *copy_net_ns(unsigned long flags, struct net *old_net) { - /* There is nothing to copy so this is a noop */ - return net_ns; + if (flags & CLONE_NEWNET) + return ERR_PTR(-EINVAL); + return old_net; } -#endif /* CONFIG_NET */ +#endif /* CONFIG_NET_NS */ extern struct list_head net_namespace_list; -- cgit v1.2.3 From 038e7332b8d4c0629a2965e3ede1a92e8e427bd6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 14 Jun 2012 02:31:10 -0700 Subject: userns: make each net (net_ns) belong to a user_ns The user namespace which creates a new network namespace owns that namespace and all resources created in it. This way we can target capability checks for privileged operations against network resources to the user_ns which created the network namespace in which the resource lives. Privilege to the user namespace which owns the network namespace, or any parent user namespace thereof, provides the same privilege to the network resource. This patch is reworked from a version originally by Serge E. Hallyn Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/net/net_namespace.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 32dcb6085ebe..c5a43f56b796 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -23,6 +23,7 @@ #endif #include +struct user_namespace; struct proc_dir_entry; struct net_device; struct sock; @@ -53,6 +54,8 @@ struct net { struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ + struct user_namespace *user_ns; /* Owning user namespace */ + struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -127,12 +130,14 @@ struct net { extern struct net init_net; #ifdef CONFIG_NET_NS -extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); +extern struct net *copy_net_ns(unsigned long flags, + struct user_namespace *user_ns, struct net *old_net); #else /* CONFIG_NET_NS */ #include #include -static inline struct net *copy_net_ns(unsigned long flags, struct net *old_net) +static inline struct net *copy_net_ns(unsigned long flags, + struct user_namespace *user_ns, struct net *old_net) { if (flags & CLONE_NEWNET) return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 49f4d8b93ccf9454284b6f524b96c66d8d7fbccc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Aug 2012 04:25:10 -0700 Subject: pidns: Capture the user namespace and filter ns_last_pid - Capture the the user namespace that creates the pid namespace - Use that user namespace to test if it is ok to write to /proc/sys/kernel/ns_last_pid. Zhao Hongjiang noticed I was missing a put_user_ns in when destroying a pid_ns. I have foloded his patch into this one so that bisects will work properly. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/pid_namespace.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 65e3e87eacc5..c89c9cfcd247 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -31,6 +31,7 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + struct user_namespace *user_ns; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ @@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); +extern struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); @@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -static inline struct pid_namespace * -copy_pid_ns(unsigned long flags, struct pid_namespace *ns) +static inline struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns) { if (flags & CLONE_NEWPID) ns = ERR_PTR(-EINVAL); -- cgit v1.2.3 From 0a01f2cc390e10633a54f72c608cc3fe19a50c3d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2012 10:33:47 -0700 Subject: pidns: Make the pidns proc mount/umount logic obvious. Track the number of pids in the proc hash table. When the number of pids goes to 0 schedule work to unmount the kernel mount of proc. Move the mount of proc into alloc_pid when we allocate the pid for init. Remove the surprising calls of pid_ns_release proc in fork and proc_flush_task. Those code paths really shouldn't know about proc namespace implementation details and people have demonstrated several times that finding and understanding those code paths is difficult and non-obvious. Because of the call path detach pid is alwasy called with the rtnl_lock held free_pid is not allowed to sleep, so the work to unmounting proc is moved to a work queue. This has the side benefit of not blocking the entire world waiting for the unnecessary rcu_barrier in deactivate_locked_super. In the process of making the code clear and obvious this fixes a bug reported by Gao feng where we would leak a mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns succeeded and copy_net_ns failed. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- include/linux/pid_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c89c9cfcd247..4c96acdb2489 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -21,6 +21,7 @@ struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; int last_pid; + int nr_hashed; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; @@ -32,6 +33,7 @@ struct pid_namespace { struct bsd_acct_struct *bacct; #endif struct user_namespace *user_ns; + struct work_struct proc_work; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ -- cgit v1.2.3 From 57e8391d327609cbf12d843259c968b9e5c1838f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:17:03 -0800 Subject: pidns: Add setns support - Pid namespaces are designed to be inescapable so verify that the passed in pid namespace is a child of the currently active pid namespace or the currently active pid namespace itself. Allowing the currently active pid namespace is important so the effects of an earlier setns can be cancelled. Signed-off-by: Eric W. Biederman --- include/linux/proc_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3fd2e871ff1b..acaafcd40aa5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -251,6 +251,7 @@ struct proc_ns_operations { extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; +extern const struct proc_ns_operations pidns_operations; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); -- cgit v1.2.3 From 8823c079ba7136dc1948d6f6dcb5f8022bde438e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:49:36 -0800 Subject: vfs: Add setns support for the mount namespace setns support for the mount namespace is a little tricky as an arbitrary decision must be made about what to set fs->root and fs->pwd to, as there is no expectation of a relationship between the two mount namespaces. Therefore I arbitrarily find the root mount point, and follow every mount on top of it to find the top of the mount stack. Then I set fs->root and fs->pwd to that location. The topmost root of the mount stack seems like a reasonable place to be. Bind mount support for the mount namespace inodes has the possibility of creating circular dependencies between mount namespaces. Circular dependencies can result in loops that prevent mount namespaces from every being freed. I avoid creating those circular dependencies by adding a sequence number to the mount namespace and require all bind mounts be of a younger mount namespace into an older mount namespace. Add a helper function proc_ns_inode so it is possible to detect when we are attempting to bind mound a namespace inode. Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/linux/proc_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index acaafcd40aa5..9014c041e752 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -174,6 +174,7 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); extern struct file *proc_ns_fget(int fd); +extern bool proc_ns_inode(struct inode *inode); #else @@ -229,6 +230,11 @@ static inline struct file *proc_ns_fget(int fd) return ERR_PTR(-EINVAL); } +static inline bool proc_ns_inode(struct inode *inode) +{ + return false; +} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) @@ -252,6 +258,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations mntns_operations; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); -- cgit v1.2.3 From 771b1371686e0a63e938ada28de020b9a0040f55 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 21:08:32 -0700 Subject: vfs: Add a user namespace reference from struct mnt_namespace This will allow for support for unprivileged mounts in a new user namespace. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- include/linux/mnt_namespace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 5a8e3903d770..12b2ab510323 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -4,9 +4,10 @@ struct mnt_namespace; struct fs_struct; +struct user_namespace; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, - struct fs_struct *); + struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); extern const struct file_operations proc_mounts_operations; -- cgit v1.2.3 From 0c55cfc4166d9a0f38de779bd4d75a90afbe7734 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 21:42:03 -0700 Subject: vfs: Allow unprivileged manipulation of the mount namespace. - Add a filesystem flag to mark filesystems that are safe to mount as an unprivileged user. - Add a filesystem flag to mark filesystems that don't need MNT_NODEV when mounted by an unprivileged user. - Relax the permission checks to allow unprivileged users that have CAP_SYS_ADMIN permissions in the user namespace referred to by the current mount namespace to be allowed to mount, unmount, and move filesystems. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b33cfc97b9ca..5037aa6817fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1812,6 +1812,8 @@ struct file_system_type { #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 +#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ +#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, -- cgit v1.2.3 From bcf58e725ddc45d31addbc6627d4f0edccc824c1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 04:02:49 -0700 Subject: userns: Make create_new_namespaces take a user_ns parameter Modify create_new_namespaces to explicitly take a user namespace parameter, instead of implicitly through the task_struct. This allows an implementation of unshare(CLONE_NEWUSER) where the new user namespace is not stored onto the current task_struct until after all of the namespaces are created. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/ipc_namespace.h | 7 ++++--- include/linux/utsname.h | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 5499c92a9153..f03af702a39d 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -133,7 +133,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #if defined(CONFIG_IPC_NS) extern struct ipc_namespace *copy_ipcs(unsigned long flags, - struct task_struct *tsk); + struct user_namespace *user_ns, struct ipc_namespace *ns); + static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) @@ -144,12 +145,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, - struct task_struct *tsk) + struct user_namespace *user_ns, struct ipc_namespace *ns) { if (flags & CLONE_NEWIPC) return ERR_PTR(-EINVAL); - return tsk->nsproxy->ipc_ns; + return ns; } static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2b345206722a..221f4a0a7502 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -33,7 +33,7 @@ static inline void get_uts_ns(struct uts_namespace *ns) } extern struct uts_namespace *copy_utsname(unsigned long flags, - struct task_struct *tsk); + struct user_namespace *user_ns, struct uts_namespace *old_ns); extern void free_uts_ns(struct kref *kref); static inline void put_uts_ns(struct uts_namespace *ns) @@ -50,12 +50,12 @@ static inline void put_uts_ns(struct uts_namespace *ns) } static inline struct uts_namespace *copy_utsname(unsigned long flags, - struct task_struct *tsk) + struct user_namespace *user_ns, struct uts_namespace *old_ns) { if (flags & CLONE_NEWUTS) return ERR_PTR(-EINVAL); - return tsk->nsproxy->uts_ns; + return old_ns; } #endif -- cgit v1.2.3 From 4c44aaafa8108f584831850ab48a975e971db2de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 05:05:21 -0700 Subject: userns: Kill task_user_ns The task_user_ns function hides the fact that it is getting the user namespace from struct cred on the task. struct cred may go away as soon as the rcu lock is released. This leads to a race where we can dereference a stale user namespace pointer. To make it obvious a struct cred is involved kill task_user_ns. To kill the race modify the users of task_user_ns to only reference the user namespace while the rcu lock is held. Cc: Kees Cook Cc: James Morris Acked-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/cred.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index ebbed2ce6637..856d2622d832 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -357,10 +357,8 @@ static inline void put_cred(const struct cred *_cred) extern struct user_namespace init_user_ns; #ifdef CONFIG_USER_NS #define current_user_ns() (current_cred_xxx(user_ns)) -#define task_user_ns(task) (task_cred_xxx((task), user_ns)) #else #define current_user_ns() (&init_user_ns) -#define task_user_ns(task) (&init_user_ns) #endif -- cgit v1.2.3 From cde1975bc242f3e1072bde623ef378e547b73f91 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 06:24:06 -0700 Subject: userns: Implent proc namespace operations This allows entering a user namespace, and the ability to store a reference to a user namespace with a bind mount. Addition of missing userns_ns_put in userns_install from Gao feng Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/proc_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 9014c041e752..31447819bc55 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -258,6 +258,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; union proc_op { -- cgit v1.2.3 From b2e0d98705e60e45bbb3c0032c48824ad7ae0704 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 05:15:35 -0700 Subject: userns: Implement unshare of the user namespace - Add CLONE_THREAD to the unshare flags if CLONE_NEWUSER is selected As changing user namespaces is only valid if all there is only a single thread. - Restore the code to add CLONE_VM if CLONE_THREAD is selected and the code to addCLONE_SIGHAND if CLONE_VM is selected. Making the constraints in the code clear. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/nsproxy.h | 2 +- include/linux/user_namespace.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cc37a55ad004..10e5947491c7 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, - struct fs_struct *); + struct cred *, struct fs_struct *); int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 95142cae446a..17651f08d67f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -39,6 +39,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) } extern int create_user_ns(struct cred *new); +extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); extern void free_user_ns(struct kref *kref); static inline void put_user_ns(struct user_namespace *ns) @@ -66,6 +67,14 @@ static inline int create_user_ns(struct cred *new) return -EINVAL; } +static inline int unshare_userns(unsigned long unshare_flags, + struct cred **new_cred) +{ + if (unshare_flags & CLONE_NEWUSER) + return -EINVAL; + return 0; +} + static inline void put_user_ns(struct user_namespace *ns) { } -- cgit v1.2.3 From 33d6dce607573b5fd7a43168e0d91221b3ca532b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 17 Jun 2011 13:33:20 -0700 Subject: proc: Generalize proc inode allocation Generalize the proc inode allocation so that it can be used without having to having to create a proc_dir_entry. This will allow namespace file descriptors to remain light weight entitities but still have the same inode number when the backing namespace is the same. Acked-by: Serge E. Hallyn Signed-off-by: Eric W. Biederman --- include/linux/proc_fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 31447819bc55..bf1d000fbba6 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -176,6 +176,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, extern struct file *proc_ns_fget(int fd); extern bool proc_ns_inode(struct inode *inode); +extern int proc_alloc_inum(unsigned int *pino); +extern void proc_free_inum(unsigned int inum); #else #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) @@ -235,6 +237,14 @@ static inline bool proc_ns_inode(struct inode *inode) return false; } +static inline int proc_alloc_inum(unsigned int *inum) +{ + *inum = 1; + return 0; +} +static inline void proc_free_inum(unsigned int inum) +{ +} #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) -- cgit v1.2.3 From 98f842e675f96ffac96e6c50315790912b2812be Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 Jun 2011 10:21:48 -0700 Subject: proc: Usable inode numbers for the namespace file descriptors. Assign a unique proc inode to each namespace, and use that inode number to ensure we only allocate at most one proc inode for every namespace in proc. A single proc inode per namespace allows userspace to test to see if two processes are in the same namespace. This has been a long requested feature and only blocked because a naive implementation would put the id in a global space and would ultimately require having a namespace for the names of namespaces, making migration and certain virtualization tricks impossible. We still don't have per superblock inode numbers for proc, which appears necessary for application unaware checkpoint/restart and migrations (if the application is using namespace file descriptors) but that is now allowd by the design if it becomes important. I have preallocated the ipc and uts initial proc inode numbers so their structures can be statically initialized. Signed-off-by: Eric W. Biederman --- include/linux/ipc_namespace.h | 2 ++ include/linux/pid_namespace.h | 1 + include/linux/proc_fs.h | 7 ++++++- include/linux/user_namespace.h | 1 + include/linux/utsname.h | 1 + include/net/net_namespace.h | 2 ++ 6 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index f03af702a39d..fe771978e877 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -67,6 +67,8 @@ struct ipc_namespace { /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; + + unsigned int proc_inum; }; extern struct ipc_namespace init_ipc_ns; diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 4c96acdb2489..bf285999273a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -37,6 +37,7 @@ struct pid_namespace { kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ + unsigned int proc_inum; }; extern struct pid_namespace init_pid_ns; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index bf1d000fbba6..2e24018b7cec 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,7 +28,11 @@ struct mm_struct; */ enum { - PROC_ROOT_INO = 1, + PROC_ROOT_INO = 1, + PROC_IPC_INIT_INO = 0xEFFFFFFFU, + PROC_UTS_INIT_INO = 0xEFFFFFFEU, + PROC_USER_INIT_INO = 0xEFFFFFFDU, + PROC_PID_INIT_INO = 0xEFFFFFFCU, }; /* @@ -263,6 +267,7 @@ struct proc_ns_operations { void *(*get)(struct task_struct *task); void (*put)(void *ns); int (*install)(struct nsproxy *nsproxy, void *ns); + unsigned int (*inum)(void *ns); }; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 17651f08d67f..b9bd2e6c73cc 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -25,6 +25,7 @@ struct user_namespace { struct user_namespace *parent; kuid_t owner; kgid_t group; + unsigned int proc_inum; }; extern struct user_namespace init_user_ns; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 221f4a0a7502..239e27733d6c 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -23,6 +23,7 @@ struct uts_namespace { struct kref kref; struct new_utsname name; struct user_namespace *user_ns; + unsigned int proc_inum; }; extern struct uts_namespace init_uts_ns; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index c5a43f56b796..de644bcd8613 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -56,6 +56,8 @@ struct net { struct user_namespace *user_ns; /* Owning user namespace */ + unsigned int proc_inum; + struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; -- cgit v1.2.3