~hc/RK356X_SDK_RELEASE.git

..	..	@@ -39,7 +39,6 @@
39	39	#include <linux/bitops.h>
40	40	#include <linux/init_task.h>
41	41	#include <linux/uaccess.h>
42		-#include <linux/build_bug.h>
43	42
44	43	#include "internal.h"
45	44	#include "mount.h"
..	..	@@ -134,7 +133,6 @@
134	133	struct filename *result;
135	134	char *kname;
136	135	int len;
137		- BUILD_BUG_ON(offsetof(struct filename, iname) % sizeof(long) != 0);
138	136
139	137	result = audit_reusename(filename);
140	138	if (result)
..	..	@@ -276,7 +274,7 @@
276	274	/* no ->get_acl() calls in RCU mode... */
277	275	if (is_uncached_acl(acl))
278	276	return -ECHILD;
279		- return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
	277	+ return posix_acl_permission(inode, acl, mask);
280	278	}
281	279
282	280	acl = get_acl(inode, ACL_TYPE_ACCESS);
..	..	@@ -293,37 +291,51 @@
293	291	}
294	292
295	293	/*
296		- * This does the basic permission checking
	294	+ * This does the basic UNIX permission checking.
	295	+ *
	296	+ * Note that the POSIX ACL check cares about the MAY_NOT_BLOCK bit,
	297	+ * for RCU walking.
297	298	*/
298	299	static int acl_permission_check(struct inode *inode, int mask)
299	300	{
300	301	unsigned int mode = inode->i_mode;
301	302
302		- if (likely(uid_eq(current_fsuid(), inode->i_uid)))
	303	+ /* Are we the owner? If so, ACL's don't matter */
	304	+ if (likely(uid_eq(current_fsuid(), inode->i_uid))) {
	305	+ mask &= 7;
303	306	mode >>= 6;
304		- else {
305		- if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
306		- int error = check_acl(inode, mask);
307		- if (error != -EAGAIN)
308		- return error;
309		- }
	307	+ return (mask & ~mode) ? -EACCES : 0;
	308	+ }
310	309
	310	+ /* Do we have ACL's? */
	311	+ if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
	312	+ int error = check_acl(inode, mask);
	313	+ if (error != -EAGAIN)
	314	+ return error;
	315	+ }
	316	+
	317	+ /* Only RWX matters for group/other mode bits */
	318	+ mask &= 7;
	319	+
	320	+ /*
	321	+ * Are the group permissions different from
	322	+ * the other permissions in the bits we care
	323	+ * about? Need to check group ownership if so.
	324	+ */
	325	+ if (mask & (mode ^ (mode >> 3))) {
311	326	if (in_group_p(inode->i_gid))
312	327	mode >>= 3;
313	328	}
314	329
315		- /*
316		- * If the DACs are ok we don't need any capability check.
317		- */
318		- if ((mask & ~mode & (MAY_READ \| MAY_WRITE \| MAY_EXEC)) == 0)
319		- return 0;
320		- return -EACCES;
	330	+ /* Bits in 'mode' clear that we require? */
	331	+ return (mask & ~mode) ? -EACCES : 0;
321	332	}
322	333
323	334	/**
324	335	* generic_permission - check for access rights on a Posix-like filesystem
325	336	* @inode: inode to check access rights for
326		- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
	337	+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
	338	+ * %MAY_NOT_BLOCK ...)
327	339	*
328	340	* Used to check for read/write/execute permissions on a file.
329	341	* We use "fsuid" for this, letting us set arbitrary permissions
..	..	@@ -382,11 +394,9 @@
382	394	* flag in inode->i_opflags, that says "this has not special
383	395	* permission function, use the fast case".
384	396	*/
385		-static inline int do_inode_permission(struct vfsmount mnt, struct inode inode, int mask)
	397	+static inline int do_inode_permission(struct inode *inode, int mask)
386	398	{
387	399	if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
388		- if (likely(mnt && inode->i_op->permission2))
389		- return inode->i_op->permission2(mnt, inode, mask);
390	400	if (likely(inode->i_op->permission))
391	401	return inode->i_op->permission(inode, mask);
392	402
..	..	@@ -419,8 +429,7 @@
419	429	}
420	430
421	431	/**
422		- * inode_permission2 - Check for access rights to a given inode
423		- * @mnt:
	432	+ * inode_permission - Check for access rights to a given inode
424	433	* @inode: Inode to check permission on
425	434	* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
426	435	*
..	..	@@ -430,7 +439,7 @@
430	439	*
431	440	* When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
432	441	*/
433		-int inode_permission2(struct vfsmount mnt, struct inode inode, int mask)
	442	+int inode_permission(struct inode *inode, int mask)
434	443	{
435	444	int retval;
436	445
..	..	@@ -454,7 +463,7 @@
454	463	return -EACCES;
455	464	}
456	465
457		- retval = do_inode_permission(mnt, inode, mask);
	466	+ retval = do_inode_permission(inode, mask);
458	467	if (retval)
459	468	return retval;
460	469
..	..	@@ -462,14 +471,7 @@
462	471	if (retval)
463	472	return retval;
464	473
465		- retval = security_inode_permission(inode, mask);
466		- return retval;
467		-}
468		-EXPORT_SYMBOL(inode_permission2);
469		-
470		-int inode_permission(struct inode *inode, int mask)
471		-{
472		- return inode_permission2(NULL, inode, mask);
	474	+ return security_inode_permission(inode, mask);
473	475	}
474	476	EXPORT_SYMBOL(inode_permission);
475	477
..	..	@@ -506,7 +508,7 @@
506	508	struct path root;
507	509	struct inode inode; / path.dentry.d_inode */
508	510	unsigned int flags;
509		- unsigned seq, m_seq;
	511	+ unsigned seq, m_seq, r_seq;
510	512	int last_type;
511	513	unsigned depth;
512	514	int total_link_count;
..	..	@@ -518,9 +520,10 @@
518	520	} *stack, internal[EMBEDDED_LEVELS];
519	521	struct filename *name;
520	522	struct nameidata *saved;
521		- struct inode *link_inode;
522	523	unsigned root_seq;
523	524	int dfd;
	525	+ kuid_t dir_uid;
	526	+ umode_t dir_mode;
524	527	} __randomize_layout;
525	528
526	529	static void set_nameidata(struct nameidata p, int dfd, struct filename name)
..	..	@@ -529,6 +532,8 @@
529	532	p->stack = p->internal;
530	533	p->dfd = dfd;
531	534	p->name = name;
	535	+ p->path.mnt = NULL;
	536	+ p->path.dentry = NULL;
532	537	p->total_link_count = old ? old->total_link_count : 0;
533	538	p->saved = old;
534	539	current->nameidata = p;
..	..	@@ -545,52 +550,34 @@
545	550	kfree(now->stack);
546	551	}
547	552
548		-static int __nd_alloc_stack(struct nameidata *nd)
	553	+static bool nd_alloc_stack(struct nameidata *nd)
549	554	{
550	555	struct saved *p;
551	556
552		- if (nd->flags & LOOKUP_RCU) {
553		- p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
554		- GFP_ATOMIC);
555		- if (unlikely(!p))
556		- return -ECHILD;
557		- } else {
558		- p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
559		- GFP_KERNEL);
560		- if (unlikely(!p))
561		- return -ENOMEM;
562		- }
	557	+ p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
	558	+ nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
	559	+ if (unlikely(!p))
	560	+ return false;
563	561	memcpy(p, nd->internal, sizeof(nd->internal));
564	562	nd->stack = p;
565		- return 0;
	563	+ return true;
566	564	}
567	565
568	566	/**
569		- * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
570		- * @path: nameidate to verify
	567	+ * path_connected - Verify that a dentry is below mnt.mnt_root
571	568	*
572	569	* Rename can sometimes move a file or directory outside of a bind
573	570	* mount, path_connected allows those cases to be detected.
574	571	*/
575		-static bool path_connected(const struct path *path)
	572	+static bool path_connected(struct vfsmount mnt, struct dentry dentry)
576	573	{
577		- struct vfsmount *mnt = path->mnt;
578	574	struct super_block *sb = mnt->mnt_sb;
579	575
580		- /* Bind mounts and multi-root filesystems can have disconnected paths */
581		- if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
	576	+ /* Bind mounts can have disconnected paths */
	577	+ if (mnt->mnt_root == sb->s_root)
582	578	return true;
583	579
584		- return is_subdir(path->dentry, mnt->mnt_root);
585		-}
586		-
587		-static inline int nd_alloc_stack(struct nameidata *nd)
588		-{
589		- if (likely(nd->depth != EMBEDDED_LEVELS))
590		- return 0;
591		- if (likely(nd->stack != nd->internal))
592		- return 0;
593		- return __nd_alloc_stack(nd);
	580	+ return is_subdir(dentry, mnt->mnt_root);
594	581	}
595	582
596	583	static void drop_links(struct nameidata *nd)
..	..	@@ -611,24 +598,23 @@
611	598	path_put(&nd->path);
612	599	for (i = 0; i < nd->depth; i++)
613	600	path_put(&nd->stack[i].link);
614		- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
	601	+ if (nd->flags & LOOKUP_ROOT_GRABBED) {
615	602	path_put(&nd->root);
616		- nd->root.mnt = NULL;
	603	+ nd->flags &= ~LOOKUP_ROOT_GRABBED;
617	604	}
618	605	} else {
619	606	nd->flags &= ~LOOKUP_RCU;
620		- if (!(nd->flags & LOOKUP_ROOT))
621		- nd->root.mnt = NULL;
622	607	rcu_read_unlock();
623	608	}
624	609	nd->depth = 0;
	610	+ nd->path.mnt = NULL;
	611	+ nd->path.dentry = NULL;
625	612	}
626	613
627	614	/* path_put is needed afterwards regardless of success or failure */
628		-static bool legitimize_path(struct nameidata *nd,
629		- struct path *path, unsigned seq)
	615	+static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
630	616	{
631		- int res = __legitimize_mnt(path->mnt, nd->m_seq);
	617	+ int res = __legitimize_mnt(path->mnt, mseq);
632	618	if (unlikely(res)) {
633	619	if (res > 0)
634	620	path->mnt = NULL;
..	..	@@ -642,9 +628,20 @@
642	628	return !read_seqcount_retry(&path->dentry->d_seq, seq);
643	629	}
644	630
	631	+static inline bool legitimize_path(struct nameidata *nd,
	632	+ struct path *path, unsigned seq)
	633	+{
	634	+ return __legitimize_path(path, seq, nd->m_seq);
	635	+}
	636	+
645	637	static bool legitimize_links(struct nameidata *nd)
646	638	{
647	639	int i;
	640	+ if (unlikely(nd->flags & LOOKUP_CACHED)) {
	641	+ drop_links(nd);
	642	+ nd->depth = 0;
	643	+ return false;
	644	+ }
648	645	for (i = 0; i < nd->depth; i++) {
649	646	struct saved *last = nd->stack + i;
650	647	if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
..	..	@@ -654,6 +651,22 @@
654	651	}
655	652	}
656	653	return true;
	654	+}
	655	+
	656	+static bool legitimize_root(struct nameidata *nd)
	657	+{
	658	+ /*
	659	+ * For scoped-lookups (where nd->root has been zeroed), we need to
	660	+ * restart the whole lookup from scratch -- because set_root() is wrong
	661	+ * for these lookups (nd->dfd is the root, not the filesystem root).
	662	+ */
	663	+ if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
	664	+ return false;
	665	+ /* Nothing to do if nd->root is zero or is managed by the VFS user. */
	666	+ if (!nd->root.mnt \|\| (nd->flags & LOOKUP_ROOT))
	667	+ return true;
	668	+ nd->flags \|= LOOKUP_ROOT_GRABBED;
	669	+ return legitimize_path(nd, &nd->root, nd->root_seq);
657	670	}
658	671
659	672	/*
..	..	@@ -668,17 +681,17 @@
668	681	*/
669	682
670	683	/**
671		- * unlazy_walk - try to switch to ref-walk mode.
	684	+ * try_to_unlazy - try to switch to ref-walk mode.
672	685	* @nd: nameidata pathwalk data
673		- * Returns: 0 on success, -ECHILD on failure
	686	+ * Returns: true on success, false on failure
674	687	*
675		- * unlazy_walk attempts to legitimize the current nd->path and nd->root
	688	+ * try_to_unlazy attempts to legitimize the current nd->path and nd->root
676	689	* for ref-walk mode.
677	690	* Must be called from rcu-walk context.
678		- * Nothing should touch nameidata between unlazy_walk() failure and
	691	+ * Nothing should touch nameidata between try_to_unlazy() failure and
679	692	* terminate_walk().
680	693	*/
681		-static int unlazy_walk(struct nameidata *nd)
	694	+static bool try_to_unlazy(struct nameidata *nd)
682	695	{
683	696	struct dentry *parent = nd->path.dentry;
684	697
..	..	@@ -686,42 +699,37 @@
686	699
687	700	nd->flags &= ~LOOKUP_RCU;
688	701	if (unlikely(!legitimize_links(nd)))
689		- goto out2;
690		- if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
691	702	goto out1;
692		- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
693		- if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq)))
694		- goto out;
695		- }
	703	+ if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
	704	+ goto out;
	705	+ if (unlikely(!legitimize_root(nd)))
	706	+ goto out;
696	707	rcu_read_unlock();
697	708	BUG_ON(nd->inode != parent->d_inode);
698		- return 0;
	709	+ return true;
699	710
700		-out2:
	711	+out1:
701	712	nd->path.mnt = NULL;
702	713	nd->path.dentry = NULL;
703		-out1:
704		- if (!(nd->flags & LOOKUP_ROOT))
705		- nd->root.mnt = NULL;
706	714	out:
707	715	rcu_read_unlock();
708		- return -ECHILD;
	716	+ return false;
709	717	}
710	718
711	719	/**
712		- * unlazy_child - try to switch to ref-walk mode.
	720	+ * try_to_unlazy_next - try to switch to ref-walk mode.
713	721	* @nd: nameidata pathwalk data
714		- * @dentry: child of nd->path.dentry
715		- * @seq: seq number to check dentry against
716		- * Returns: 0 on success, -ECHILD on failure
	722	+ * @dentry: next dentry to step into
	723	+ * @seq: seq number to check @dentry against
	724	+ * Returns: true on success, false on failure
717	725	*
718		- * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry
719		- * for ref-walk mode. @dentry must be a path found by a do_lookup call on
720		- * @nd. Must be called from rcu-walk context.
721		- * Nothing should touch nameidata between unlazy_child() failure and
	726	+ * Similar to to try_to_unlazy(), but here we have the next dentry already
	727	+ * picked by rcu-walk and want to legitimize that in addition to the current
	728	+ * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context.
	729	+ * Nothing should touch nameidata between try_to_unlazy_next() failure and
722	730	* terminate_walk().
723	731	*/
724		-static int unlazy_child(struct nameidata nd, struct dentry dentry, unsigned seq)
	732	+static bool try_to_unlazy_next(struct nameidata nd, struct dentry dentry, unsigned seq)
725	733	{
726	734	BUG_ON(!(nd->flags & LOOKUP_RCU));
727	735
..	..	@@ -742,25 +750,16 @@
742	750	*/
743	751	if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
744	752	goto out;
745		- if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) {
746		- rcu_read_unlock();
747		- dput(dentry);
748		- goto drop_root_mnt;
749		- }
	753	+ if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
	754	+ goto out_dput;
750	755	/*
751	756	* Sequence counts matched. Now make sure that the root is
752	757	* still valid and get it if required.
753	758	*/
754		- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
755		- if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) {
756		- rcu_read_unlock();
757		- dput(dentry);
758		- return -ECHILD;
759		- }
760		- }
761		-
	759	+ if (unlikely(!legitimize_root(nd)))
	760	+ goto out_dput;
762	761	rcu_read_unlock();
763		- return 0;
	762	+ return true;
764	763
765	764	out2:
766	765	nd->path.mnt = NULL;
..	..	@@ -768,10 +767,11 @@
768	767	nd->path.dentry = NULL;
769	768	out:
770	769	rcu_read_unlock();
771		-drop_root_mnt:
772		- if (!(nd->flags & LOOKUP_ROOT))
773		- nd->root.mnt = NULL;
774		- return -ECHILD;
	770	+ return false;
	771	+out_dput:
	772	+ rcu_read_unlock();
	773	+ dput(dentry);
	774	+ return false;
775	775	}
776	776
777	777	static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
..	..	@@ -873,10 +873,36 @@
873	873	int status;
874	874
875	875	if (nd->flags & LOOKUP_RCU) {
876		- if (!(nd->flags & LOOKUP_ROOT))
	876	+ /*
	877	+ * We don't want to zero nd->root for scoped-lookups or
	878	+ * externally-managed nd->root.
	879	+ */
	880	+ if (!(nd->flags & (LOOKUP_ROOT \| LOOKUP_IS_SCOPED)))
877	881	nd->root.mnt = NULL;
878		- if (unlikely(unlazy_walk(nd)))
	882	+ nd->flags &= ~LOOKUP_CACHED;
	883	+ if (!try_to_unlazy(nd))
879	884	return -ECHILD;
	885	+ }
	886	+
	887	+ if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
	888	+ /*
	889	+ * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
	890	+ * ever step outside the root during lookup" and should already
	891	+ * be guaranteed by the rest of namei, we want to avoid a namei
	892	+ * BUG resulting in userspace being given a path that was not
	893	+ * scoped within the root at some point during the lookup.
	894	+ *
	895	+ * So, do a final sanity-check to make sure that in the
	896	+ * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
	897	+ * we won't silently return an fd completely outside of the
	898	+ * requested root to userspace.
	899	+ *
	900	+ * Userspace could move the path outside the root after this
	901	+ * check, but as discussed elsewhere this is not a concern (the
	902	+ * resolved file was inside the root at some point).
	903	+ */
	904	+ if (!path_is_under(&nd->path, &nd->root))
	905	+ return -EXDEV;
880	906	}
881	907
882	908	if (likely(!(nd->flags & LOOKUP_JUMPED))) {
..	..	@@ -901,9 +927,17 @@
901	927	return status;
902	928	}
903	929
904		-static void set_root(struct nameidata *nd)
	930	+static int set_root(struct nameidata *nd)
905	931	{
906	932	struct fs_struct *fs = current->fs;
	933	+
	934	+ /*
	935	+ * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
	936	+ * still have to ensure it doesn't happen because it will cause a breakout
	937	+ * from the dirfd.
	938	+ */
	939	+ if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
	940	+ return -ENOTRECOVERABLE;
907	941
908	942	if (nd->flags & LOOKUP_RCU) {
909	943	unsigned seq;
..	..	@@ -915,30 +949,25 @@
915	949	} while (read_seqcount_retry(&fs->seq, seq));
916	950	} else {
917	951	get_fs_root(fs, &nd->root);
	952	+ nd->flags \|= LOOKUP_ROOT_GRABBED;
918	953	}
919		-}
920		-
921		-static void path_put_conditional(struct path path, struct nameidata nd)
922		-{
923		- dput(path->dentry);
924		- if (path->mnt != nd->path.mnt)
925		- mntput(path->mnt);
926		-}
927		-
928		-static inline void path_to_nameidata(const struct path *path,
929		- struct nameidata *nd)
930		-{
931		- if (!(nd->flags & LOOKUP_RCU)) {
932		- dput(nd->path.dentry);
933		- if (nd->path.mnt != path->mnt)
934		- mntput(nd->path.mnt);
935		- }
936		- nd->path.mnt = path->mnt;
937		- nd->path.dentry = path->dentry;
	954	+ return 0;
938	955	}
939	956
940	957	static int nd_jump_root(struct nameidata *nd)
941	958	{
	959	+ if (unlikely(nd->flags & LOOKUP_BENEATH))
	960	+ return -EXDEV;
	961	+ if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
	962	+ /* Absolute path arguments to path_init() are allowed. */
	963	+ if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
	964	+ return -EXDEV;
	965	+ }
	966	+ if (!nd->root.mnt) {
	967	+ int error = set_root(nd);
	968	+ if (error)
	969	+ return error;
	970	+ }
942	971	if (nd->flags & LOOKUP_RCU) {
943	972	struct dentry *d;
944	973	nd->path = nd->root;
..	..	@@ -961,14 +990,32 @@
961	990	* Helper to directly jump to a known parsed path from ->get_link,
962	991	* caller must have taken a reference to path beforehand.
963	992	*/
964		-void nd_jump_link(struct path *path)
	993	+int nd_jump_link(struct path *path)
965	994	{
	995	+ int error = -ELOOP;
966	996	struct nameidata *nd = current->nameidata;
967		- path_put(&nd->path);
968	997
	998	+ if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
	999	+ goto err;
	1000	+
	1001	+ error = -EXDEV;
	1002	+ if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
	1003	+ if (nd->path.mnt != path->mnt)
	1004	+ goto err;
	1005	+ }
	1006	+ /* Not currently safe for scoped-lookups. */
	1007	+ if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
	1008	+ goto err;
	1009	+
	1010	+ path_put(&nd->path);
969	1011	nd->path = *path;
970	1012	nd->inode = nd->path.dentry->d_inode;
971	1013	nd->flags \|= LOOKUP_JUMPED;
	1014	+ return 0;
	1015	+
	1016	+err:
	1017	+ path_put(path);
	1018	+ return error;
972	1019	}
973	1020
974	1021	static inline void put_link(struct nameidata *nd)
..	..	@@ -999,35 +1046,28 @@
999	1046	*
1000	1047	* Returns 0 if following the symlink is allowed, -ve on error.
1001	1048	*/
1002		-static inline int may_follow_link(struct nameidata *nd)
	1049	+static inline int may_follow_link(struct nameidata nd, const struct inode inode)
1003	1050	{
1004		- const struct inode *inode;
1005		- const struct inode *parent;
1006		- kuid_t puid;
1007		-
1008	1051	if (!sysctl_protected_symlinks)
1009	1052	return 0;
1010	1053
1011	1054	/* Allowed if owner and follower match. */
1012		- inode = nd->link_inode;
1013	1055	if (uid_eq(current_cred()->fsuid, inode->i_uid))
1014	1056	return 0;
1015	1057
1016	1058	/* Allowed if parent directory not sticky and world-writable. */
1017		- parent = nd->inode;
1018		- if ((parent->i_mode & (S_ISVTX\|S_IWOTH)) != (S_ISVTX\|S_IWOTH))
	1059	+ if ((nd->dir_mode & (S_ISVTX\|S_IWOTH)) != (S_ISVTX\|S_IWOTH))
1019	1060	return 0;
1020	1061
1021	1062	/* Allowed if parent directory and link owner match. */
1022		- puid = parent->i_uid;
1023		- if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
	1063	+ if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
1024	1064	return 0;
1025	1065
1026	1066	if (nd->flags & LOOKUP_RCU)
1027	1067	return -ECHILD;
1028	1068
1029	1069	audit_inode(nd->name, nd->stack[0].link.dentry, 0);
1030		- audit_log_link_denied("follow_link");
	1070	+ audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
1031	1071	return -EACCES;
1032	1072	}
1033	1073
..	..	@@ -1078,7 +1118,7 @@
1078	1118	*
1079	1119	* Returns 0 if successful, -ve on error.
1080	1120	*/
1081		-static int may_linkat(struct path *link)
	1121	+int may_linkat(struct path *link)
1082	1122	{
1083	1123	struct inode *inode = link->dentry->d_inode;
1084	1124
..	..	@@ -1095,7 +1135,7 @@
1095	1135	if (safe_hardlink_source(inode) \|\| inode_owner_or_capable(inode))
1096	1136	return 0;
1097	1137
1098		- audit_log_link_denied("linkat");
	1138	+ audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
1099	1139	return -EPERM;
1100	1140	}
1101	1141
..	..	@@ -1134,64 +1174,13 @@
1134	1174	(dir_mode & 0020 &&
1135	1175	((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) \|\|
1136	1176	(sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
	1177	+ const char *operation = S_ISFIFO(inode->i_mode) ?
	1178	+ "sticky_create_fifo" :
	1179	+ "sticky_create_regular";
	1180	+ audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
1137	1181	return -EACCES;
1138	1182	}
1139	1183	return 0;
1140		-}
1141		-
1142		-static __always_inline
1143		-const char get_link(struct nameidata nd)
1144		-{
1145		- struct saved *last = nd->stack + nd->depth - 1;
1146		- struct dentry *dentry = last->link.dentry;
1147		- struct inode *inode = nd->link_inode;
1148		- int error;
1149		- const char *res;
1150		-
1151		- if (!(nd->flags & LOOKUP_RCU)) {
1152		- touch_atime(&last->link);
1153		- cond_resched();
1154		- } else if (atime_needs_update(&last->link, inode)) {
1155		- if (unlikely(unlazy_walk(nd)))
1156		- return ERR_PTR(-ECHILD);
1157		- touch_atime(&last->link);
1158		- }
1159		-
1160		- error = security_inode_follow_link(dentry, inode,
1161		- nd->flags & LOOKUP_RCU);
1162		- if (unlikely(error))
1163		- return ERR_PTR(error);
1164		-
1165		- nd->last_type = LAST_BIND;
1166		- res = READ_ONCE(inode->i_link);
1167		- if (!res) {
1168		- const char * (get)(struct dentry , struct inode *,
1169		- struct delayed_call *);
1170		- get = inode->i_op->get_link;
1171		- if (nd->flags & LOOKUP_RCU) {
1172		- res = get(NULL, inode, &last->done);
1173		- if (res == ERR_PTR(-ECHILD)) {
1174		- if (unlikely(unlazy_walk(nd)))
1175		- return ERR_PTR(-ECHILD);
1176		- res = get(dentry, inode, &last->done);
1177		- }
1178		- } else {
1179		- res = get(dentry, inode, &last->done);
1180		- }
1181		- if (IS_ERR_OR_NULL(res))
1182		- return res;
1183		- }
1184		- if (*res == '/') {
1185		- if (!nd->root.mnt)
1186		- set_root(nd);
1187		- if (unlikely(nd_jump_root(nd)))
1188		- return ERR_PTR(-ECHILD);
1189		- while (unlikely(*++res == '/'))
1190		- ;
1191		- }
1192		- if (!*res)
1193		- res = NULL;
1194		- return res;
1195	1184	}
1196	1185
1197	1186	/*
..	..	@@ -1227,19 +1216,59 @@
1227	1216	}
1228	1217	EXPORT_SYMBOL(follow_up);
1229	1218
	1219	+static bool choose_mountpoint_rcu(struct mount m, const struct path root,
	1220	+ struct path path, unsigned seqp)
	1221	+{
	1222	+ while (mnt_has_parent(m)) {
	1223	+ struct dentry *mountpoint = m->mnt_mountpoint;
	1224	+
	1225	+ m = m->mnt_parent;
	1226	+ if (unlikely(root->dentry == mountpoint &&
	1227	+ root->mnt == &m->mnt))
	1228	+ break;
	1229	+ if (mountpoint != m->mnt.mnt_root) {
	1230	+ path->mnt = &m->mnt;
	1231	+ path->dentry = mountpoint;
	1232	+ *seqp = read_seqcount_begin(&mountpoint->d_seq);
	1233	+ return true;
	1234	+ }
	1235	+ }
	1236	+ return false;
	1237	+}
	1238	+
	1239	+static bool choose_mountpoint(struct mount m, const struct path root,
	1240	+ struct path *path)
	1241	+{
	1242	+ bool found;
	1243	+
	1244	+ rcu_read_lock();
	1245	+ while (1) {
	1246	+ unsigned seq, mseq = read_seqbegin(&mount_lock);
	1247	+
	1248	+ found = choose_mountpoint_rcu(m, root, path, &seq);
	1249	+ if (unlikely(!found)) {
	1250	+ if (!read_seqretry(&mount_lock, mseq))
	1251	+ break;
	1252	+ } else {
	1253	+ if (likely(__legitimize_path(path, seq, mseq)))
	1254	+ break;
	1255	+ rcu_read_unlock();
	1256	+ path_put(path);
	1257	+ rcu_read_lock();
	1258	+ }
	1259	+ }
	1260	+ rcu_read_unlock();
	1261	+ return found;
	1262	+}
	1263	+
1230	1264	/*
1231	1265	* Perform an automount
1232	1266	* - return -EISDIR to tell follow_managed() to stop and return the path we
1233	1267	* were called with.
1234	1268	*/
1235		-static int follow_automount(struct path path, struct nameidata nd,
1236		- bool *need_mntput)
	1269	+static int follow_automount(struct path path, int count, unsigned lookup_flags)
1237	1270	{
1238		- struct vfsmount *mnt;
1239		- int err;
1240		-
1241		- if (!path->dentry->d_op \|\| !path->dentry->d_op->d_automount)
1242		- return -EREMOTE;
	1271	+ struct dentry *dentry = path->dentry;
1243	1272
1244	1273	/* We don't want to mount if someone's just doing a stat -
1245	1274	* unless they're stat'ing a directory and appended a '/' to
..	..	@@ -1252,129 +1281,89 @@
1252	1281	* as being automount points. These will need the attentions
1253	1282	* of the daemon to instantiate them before they can be used.
1254	1283	*/
1255		- if (!(nd->flags & (LOOKUP_PARENT \| LOOKUP_DIRECTORY \|
	1284	+ if (!(lookup_flags & (LOOKUP_PARENT \| LOOKUP_DIRECTORY \|
1256	1285	LOOKUP_OPEN \| LOOKUP_CREATE \| LOOKUP_AUTOMOUNT)) &&
1257		- path->dentry->d_inode)
	1286	+ dentry->d_inode)
1258	1287	return -EISDIR;
1259	1288
1260		- nd->total_link_count++;
1261		- if (nd->total_link_count >= 40)
	1289	+ if (count && (*count)++ >= MAXSYMLINKS)
1262	1290	return -ELOOP;
1263	1291
1264		- mnt = path->dentry->d_op->d_automount(path);
1265		- if (IS_ERR(mnt)) {
1266		- /*
1267		- * The filesystem is allowed to return -EISDIR here to indicate
1268		- * it doesn't want to automount. For instance, autofs would do
1269		- * this so that its userspace daemon can mount on this dentry.
1270		- *
1271		- * However, we can only permit this if it's a terminal point in
1272		- * the path being looked up; if it wasn't then the remainder of
1273		- * the path is inaccessible and we should say so.
1274		- */
1275		- if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
1276		- return -EREMOTE;
1277		- return PTR_ERR(mnt);
1278		- }
1279		-
1280		- if (!mnt) /* mount collision */
1281		- return 0;
1282		-
1283		- if (!*need_mntput) {
1284		- /* lock_mount() may release path->mnt on error */
1285		- mntget(path->mnt);
1286		- *need_mntput = true;
1287		- }
1288		- err = finish_automount(mnt, path);
1289		-
1290		- switch (err) {
1291		- case -EBUSY:
1292		- /* Someone else made a mount here whilst we were busy */
1293		- return 0;
1294		- case 0:
1295		- path_put(path);
1296		- path->mnt = mnt;
1297		- path->dentry = dget(mnt->mnt_root);
1298		- return 0;
1299		- default:
1300		- return err;
1301		- }
1302		-
	1292	+ return finish_automount(dentry->d_op->d_automount(path), path);
1303	1293	}
1304	1294
1305	1295	/*
1306		- * Handle a dentry that is managed in some way.
1307		- * - Flagged for transit management (autofs)
1308		- * - Flagged as mountpoint
1309		- * - Flagged as automount point
1310		- *
1311		- * This may only be called in refwalk mode.
1312		- *
1313		- * Serialization is taken care of in namespace.c
	1296	+ * mount traversal - out-of-line part. One note on ->d_flags accesses -
	1297	+ * dentries are pinned but not locked here, so negative dentry can go
	1298	+ * positive right under us. Use of smp_load_acquire() provides a barrier
	1299	+ * sufficient for ->d_inode and ->d_flags consistency.
1314	1300	*/
1315		-static int follow_managed(struct path path, struct nameidata nd)
	1301	+static int __traverse_mounts(struct path path, unsigned flags, bool jumped,
	1302	+ int *count, unsigned lookup_flags)
1316	1303	{
1317		- struct vfsmount mnt = path->mnt; / held by caller, must be left alone */
1318		- unsigned managed;
	1304	+ struct vfsmount *mnt = path->mnt;
1319	1305	bool need_mntput = false;
1320	1306	int ret = 0;
1321	1307
1322		- /* Given that we're not holding a lock here, we retain the value in a
1323		- * local variable for each dentry as we look at it so that we don't see
1324		- * the components of that value change under us */
1325		- while (managed = READ_ONCE(path->dentry->d_flags),
1326		- managed &= DCACHE_MANAGED_DENTRY,
1327		- unlikely(managed != 0)) {
	1308	+ while (flags & DCACHE_MANAGED_DENTRY) {
1328	1309	/* Allow the filesystem to manage the transit without i_mutex
1329	1310	* being held. */
1330		- if (managed & DCACHE_MANAGE_TRANSIT) {
1331		- BUG_ON(!path->dentry->d_op);
1332		- BUG_ON(!path->dentry->d_op->d_manage);
	1311	+ if (flags & DCACHE_MANAGE_TRANSIT) {
1333	1312	ret = path->dentry->d_op->d_manage(path, false);
	1313	+ flags = smp_load_acquire(&path->dentry->d_flags);
1334	1314	if (ret < 0)
1335	1315	break;
1336	1316	}
1337	1317
1338		- /* Transit to a mounted filesystem. */
1339		- if (managed & DCACHE_MOUNTED) {
	1318	+ if (flags & DCACHE_MOUNTED) { // something's mounted on it..
1340	1319	struct vfsmount *mounted = lookup_mnt(path);
1341		- if (mounted) {
	1320	+ if (mounted) { // ... in our namespace
1342	1321	dput(path->dentry);
1343	1322	if (need_mntput)
1344	1323	mntput(path->mnt);
1345	1324	path->mnt = mounted;
1346	1325	path->dentry = dget(mounted->mnt_root);
	1326	+ // here we know it's positive
	1327	+ flags = path->dentry->d_flags;
1347	1328	need_mntput = true;
1348	1329	continue;
1349	1330	}
1350		-
1351		- /* Something is mounted on this dentry in another
1352		- * namespace and/or whatever was mounted there in this
1353		- * namespace got unmounted before lookup_mnt() could
1354		- * get it */
1355	1331	}
1356	1332
1357		- /* Handle an automount point */
1358		- if (managed & DCACHE_NEED_AUTOMOUNT) {
1359		- ret = follow_automount(path, nd, &need_mntput);
1360		- if (ret < 0)
1361		- break;
1362		- continue;
1363		- }
	1333	+ if (!(flags & DCACHE_NEED_AUTOMOUNT))
	1334	+ break;
1364	1335
1365		- /* We didn't change the current path point */
1366		- break;
	1336	+ // uncovered automount point
	1337	+ ret = follow_automount(path, count, lookup_flags);
	1338	+ flags = smp_load_acquire(&path->dentry->d_flags);
	1339	+ if (ret < 0)
	1340	+ break;
1367	1341	}
1368	1342
	1343	+ if (ret == -EISDIR)
	1344	+ ret = 0;
	1345	+ // possible if you race with several mount --move
1369	1346	if (need_mntput && path->mnt == mnt)
1370	1347	mntput(path->mnt);
1371		- if (ret == -EISDIR \|\| !ret)
1372		- ret = 1;
1373		- if (need_mntput)
1374		- nd->flags \|= LOOKUP_JUMPED;
1375		- if (unlikely(ret < 0))
1376		- path_put_conditional(path, nd);
	1348	+ if (!ret && unlikely(d_flags_negative(flags)))
	1349	+ ret = -ENOENT;
	1350	+ *jumped = need_mntput;
1377	1351	return ret;
	1352	+}
	1353	+
	1354	+static inline int traverse_mounts(struct path path, bool jumped,
	1355	+ int *count, unsigned lookup_flags)
	1356	+{
	1357	+ unsigned flags = smp_load_acquire(&path->dentry->d_flags);
	1358	+
	1359	+ /* fastpath */
	1360	+ if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
	1361	+ *jumped = false;
	1362	+ if (unlikely(d_flags_negative(flags)))
	1363	+ return -ENOENT;
	1364	+ return 0;
	1365	+ }
	1366	+ return __traverse_mounts(path, flags, jumped, count, lookup_flags);
1378	1367	}
1379	1368
1380	1369	int follow_down_one(struct path *path)
..	..	@@ -1393,11 +1382,22 @@
1393	1382	}
1394	1383	EXPORT_SYMBOL(follow_down_one);
1395	1384
1396		-static inline int managed_dentry_rcu(const struct path *path)
	1385	+/*
	1386	+ * Follow down to the covering mount currently visible to userspace. At each
	1387	+ * point, the filesystem owning that dentry may be queried as to whether the
	1388	+ * caller is permitted to proceed or not.
	1389	+ */
	1390	+int follow_down(struct path *path)
1397	1391	{
1398		- return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
1399		- path->dentry->d_op->d_manage(path, true) : 0;
	1392	+ struct vfsmount *mnt = path->mnt;
	1393	+ bool jumped;
	1394	+ int ret = traverse_mounts(path, &jumped, NULL, 0);
	1395	+
	1396	+ if (path->mnt != mnt)
	1397	+ mntput(mnt);
	1398	+ return ret;
1400	1399	}
	1400	+EXPORT_SYMBOL(follow_down);
1401	1401
1402	1402	/*
1403	1403	* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
..	..	@@ -1406,190 +1406,90 @@
1406	1406	static bool __follow_mount_rcu(struct nameidata nd, struct path path,
1407	1407	struct inode *inode, unsigned seqp)
1408	1408	{
	1409	+ struct dentry *dentry = path->dentry;
	1410	+ unsigned int flags = dentry->d_flags;
	1411	+
	1412	+ if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
	1413	+ return true;
	1414	+
	1415	+ if (unlikely(nd->flags & LOOKUP_NO_XDEV))
	1416	+ return false;
	1417	+
1409	1418	for (;;) {
1410		- struct mount *mounted;
1411	1419	/*
1412	1420	* Don't forget we might have a non-mountpoint managed dentry
1413	1421	* that wants to block transit.
1414	1422	*/
1415		- switch (managed_dentry_rcu(path)) {
1416		- case -ECHILD:
1417		- default:
1418		- return false;
1419		- case -EISDIR:
1420		- return true;
1421		- case 0:
1422		- break;
	1423	+ if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
	1424	+ int res = dentry->d_op->d_manage(path, true);
	1425	+ if (res)
	1426	+ return res == -EISDIR;
	1427	+ flags = dentry->d_flags;
1423	1428	}
1424	1429
1425		- if (!d_mountpoint(path->dentry))
1426		- return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
1427		-
1428		- mounted = __lookup_mnt(path->mnt, path->dentry);
1429		- if (!mounted)
1430		- break;
1431		- path->mnt = &mounted->mnt;
1432		- path->dentry = mounted->mnt.mnt_root;
1433		- nd->flags \|= LOOKUP_JUMPED;
1434		- *seqp = read_seqcount_begin(&path->dentry->d_seq);
1435		- /*
1436		- * Update the inode too. We don't need to re-check the
1437		- * dentry sequence number here after this d_inode read,
1438		- * because a mount-point is always pinned.
1439		- */
1440		- *inode = path->dentry->d_inode;
1441		- }
1442		- return !read_seqretry(&mount_lock, nd->m_seq) &&
1443		- !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
1444		-}
1445		-
1446		-static int follow_dotdot_rcu(struct nameidata *nd)
1447		-{
1448		- struct inode *inode = nd->inode;
1449		-
1450		- while (1) {
1451		- if (path_equal(&nd->path, &nd->root))
1452		- break;
1453		- if (nd->path.dentry != nd->path.mnt->mnt_root) {
1454		- struct dentry *old = nd->path.dentry;
1455		- struct dentry *parent = old->d_parent;
1456		- unsigned seq;
1457		-
1458		- inode = parent->d_inode;
1459		- seq = read_seqcount_begin(&parent->d_seq);
1460		- if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
1461		- return -ECHILD;
1462		- nd->path.dentry = parent;
1463		- nd->seq = seq;
1464		- if (unlikely(!path_connected(&nd->path)))
1465		- return -ECHILD;
1466		- break;
1467		- } else {
1468		- struct mount *mnt = real_mount(nd->path.mnt);
1469		- struct mount *mparent = mnt->mnt_parent;
1470		- struct dentry *mountpoint = mnt->mnt_mountpoint;
1471		- struct inode *inode2 = mountpoint->d_inode;
1472		- unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
1473		- if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
1474		- return -ECHILD;
1475		- if (&mparent->mnt == nd->path.mnt)
1476		- break;
1477		- /* we know that mountpoint was pinned */
1478		- nd->path.dentry = mountpoint;
1479		- nd->path.mnt = &mparent->mnt;
1480		- inode = inode2;
1481		- nd->seq = seq;
	1430	+ if (flags & DCACHE_MOUNTED) {
	1431	+ struct mount *mounted = __lookup_mnt(path->mnt, dentry);
	1432	+ if (mounted) {
	1433	+ path->mnt = &mounted->mnt;
	1434	+ dentry = path->dentry = mounted->mnt.mnt_root;
	1435	+ nd->flags \|= LOOKUP_JUMPED;
	1436	+ *seqp = read_seqcount_begin(&dentry->d_seq);
	1437	+ *inode = dentry->d_inode;
	1438	+ /*
	1439	+ * We don't need to re-check ->d_seq after this
	1440	+ * ->d_inode read - there will be an RCU delay
	1441	+ * between mount hash removal and ->mnt_root
	1442	+ * becoming unpinned.
	1443	+ */
	1444	+ flags = dentry->d_flags;
	1445	+ if (read_seqretry(&mount_lock, nd->m_seq))
	1446	+ return false;
	1447	+ continue;
	1448	+ }
	1449	+ if (read_seqretry(&mount_lock, nd->m_seq))
	1450	+ return false;
1482	1451	}
	1452	+ return !(flags & DCACHE_NEED_AUTOMOUNT);
1483	1453	}
1484		- while (unlikely(d_mountpoint(nd->path.dentry))) {
1485		- struct mount *mounted;
1486		- mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
1487		- if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
1488		- return -ECHILD;
1489		- if (!mounted)
1490		- break;
1491		- nd->path.mnt = &mounted->mnt;
1492		- nd->path.dentry = mounted->mnt.mnt_root;
1493		- inode = nd->path.dentry->d_inode;
1494		- nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1495		- }
1496		- nd->inode = inode;
1497		- return 0;
1498	1454	}
1499	1455
1500		-/*
1501		- * Follow down to the covering mount currently visible to userspace. At each
1502		- * point, the filesystem owning that dentry may be queried as to whether the
1503		- * caller is permitted to proceed or not.
1504		- */
1505		-int follow_down(struct path *path)
	1456	+static inline int handle_mounts(struct nameidata nd, struct dentry dentry,
	1457	+ struct path path, struct inode *inode,
	1458	+ unsigned int *seqp)
1506	1459	{
1507		- unsigned managed;
	1460	+ bool jumped;
1508	1461	int ret;
1509	1462
1510		- while (managed = READ_ONCE(path->dentry->d_flags),
1511		- unlikely(managed & DCACHE_MANAGED_DENTRY)) {
1512		- /* Allow the filesystem to manage the transit without i_mutex
1513		- * being held.
1514		- *
1515		- * We indicate to the filesystem if someone is trying to mount
1516		- * something here. This gives autofs the chance to deny anyone
1517		- * other than its daemon the right to mount on its
1518		- * superstructure.
1519		- *
1520		- * The filesystem may sleep at this point.
1521		- */
1522		- if (managed & DCACHE_MANAGE_TRANSIT) {
1523		- BUG_ON(!path->dentry->d_op);
1524		- BUG_ON(!path->dentry->d_op->d_manage);
1525		- ret = path->dentry->d_op->d_manage(path, false);
1526		- if (ret < 0)
1527		- return ret == -EISDIR ? 0 : ret;
1528		- }
1529		-
1530		- /* Transit to a mounted filesystem. */
1531		- if (managed & DCACHE_MOUNTED) {
1532		- struct vfsmount *mounted = lookup_mnt(path);
1533		- if (!mounted)
1534		- break;
1535		- dput(path->dentry);
1536		- mntput(path->mnt);
1537		- path->mnt = mounted;
1538		- path->dentry = dget(mounted->mnt_root);
1539		- continue;
1540		- }
1541		-
1542		- /* Don't handle automount points here */
1543		- break;
	1463	+ path->mnt = nd->path.mnt;
	1464	+ path->dentry = dentry;
	1465	+ if (nd->flags & LOOKUP_RCU) {
	1466	+ unsigned int seq = *seqp;
	1467	+ if (unlikely(!*inode))
	1468	+ return -ENOENT;
	1469	+ if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
	1470	+ return 0;
	1471	+ if (!try_to_unlazy_next(nd, dentry, seq))
	1472	+ return -ECHILD;
	1473	+ // *path might've been clobbered by __follow_mount_rcu()
	1474	+ path->mnt = nd->path.mnt;
	1475	+ path->dentry = dentry;
1544	1476	}
1545		- return 0;
1546		-}
1547		-EXPORT_SYMBOL(follow_down);
1548		-
1549		-/*
1550		- * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
1551		- */
1552		-static void follow_mount(struct path *path)
1553		-{
1554		- while (d_mountpoint(path->dentry)) {
1555		- struct vfsmount *mounted = lookup_mnt(path);
1556		- if (!mounted)
1557		- break;
	1477	+ ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
	1478	+ if (jumped) {
	1479	+ if (unlikely(nd->flags & LOOKUP_NO_XDEV))
	1480	+ ret = -EXDEV;
	1481	+ else
	1482	+ nd->flags \|= LOOKUP_JUMPED;
	1483	+ }
	1484	+ if (unlikely(ret)) {
1558	1485	dput(path->dentry);
1559		- mntput(path->mnt);
1560		- path->mnt = mounted;
1561		- path->dentry = dget(mounted->mnt_root);
	1486	+ if (path->mnt != nd->path.mnt)
	1487	+ mntput(path->mnt);
	1488	+ } else {
	1489	+ *inode = d_backing_inode(path->dentry);
	1490	+ seqp = 0; / out of RCU mode, so the value doesn't matter */
1562	1491	}
1563		-}
1564		-
1565		-static int path_parent_directory(struct path *path)
1566		-{
1567		- struct dentry *old = path->dentry;
1568		- /* rare case of legitimate dget_parent()... */
1569		- path->dentry = dget_parent(path->dentry);
1570		- dput(old);
1571		- if (unlikely(!path_connected(path)))
1572		- return -ENOENT;
1573		- return 0;
1574		-}
1575		-
1576		-static int follow_dotdot(struct nameidata *nd)
1577		-{
1578		- while(1) {
1579		- if (path_equal(&nd->path, &nd->root))
1580		- break;
1581		- if (nd->path.dentry != nd->path.mnt->mnt_root) {
1582		- int ret = path_parent_directory(&nd->path);
1583		- if (ret)
1584		- return ret;
1585		- break;
1586		- }
1587		- if (!follow_up(&nd->path))
1588		- break;
1589		- }
1590		- follow_mount(&nd->path);
1591		- nd->inode = nd->path.dentry->d_inode;
1592		- return 0;
	1492	+ return ret;
1593	1493	}
1594	1494
1595	1495	/*
..	..	@@ -1646,14 +1546,12 @@
1646	1546	return dentry;
1647	1547	}
1648	1548
1649		-static int lookup_fast(struct nameidata *nd,
1650		- struct path path, struct inode *inode,
1651		- unsigned *seqp)
	1549	+static struct dentry lookup_fast(struct nameidata nd,
	1550	+ struct inode **inode,
	1551	+ unsigned *seqp)
1652	1552	{
1653		- struct vfsmount *mnt = nd->path.mnt;
1654	1553	struct dentry dentry, parent = nd->path.dentry;
1655	1554	int status = 1;
1656		- int err;
1657	1555
1658	1556	/*
1659	1557	* Rename seqlock is not required here because in the off chance
..	..	@@ -1662,12 +1560,11 @@
1662	1560	*/
1663	1561	if (nd->flags & LOOKUP_RCU) {
1664	1562	unsigned seq;
1665		- bool negative;
1666	1563	dentry = __d_lookup_rcu(parent, &nd->last, &seq);
1667	1564	if (unlikely(!dentry)) {
1668		- if (unlazy_walk(nd))
1669		- return -ECHILD;
1670		- return 0;
	1565	+ if (!try_to_unlazy(nd))
	1566	+ return ERR_PTR(-ECHILD);
	1567	+ return NULL;
1671	1568	}
1672	1569
1673	1570	/*
..	..	@@ -1675,9 +1572,8 @@
1675	1572	* the dentry name information from lookup.
1676	1573	*/
1677	1574	*inode = d_backing_inode(dentry);
1678		- negative = d_is_negative(dentry);
1679	1575	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
1680		- return -ECHILD;
	1576	+ return ERR_PTR(-ECHILD);
1681	1577
1682	1578	/*
1683	1579	* This sequence count validates that the parent had no
..	..	@@ -1687,50 +1583,30 @@
1687	1583	* enough, we can use __read_seqcount_retry here.
1688	1584	*/
1689	1585	if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
1690		- return -ECHILD;
	1586	+ return ERR_PTR(-ECHILD);
1691	1587
1692	1588	*seqp = seq;
1693	1589	status = d_revalidate(dentry, nd->flags);
1694		- if (likely(status > 0)) {
1695		- /*
1696		- * Note: do negative dentry check after revalidation in
1697		- * case that drops it.
1698		- */
1699		- if (unlikely(negative))
1700		- return -ENOENT;
1701		- path->mnt = mnt;
1702		- path->dentry = dentry;
1703		- if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
1704		- return 1;
1705		- }
1706		- if (unlazy_child(nd, dentry, seq))
1707		- return -ECHILD;
	1590	+ if (likely(status > 0))
	1591	+ return dentry;
	1592	+ if (!try_to_unlazy_next(nd, dentry, seq))
	1593	+ return ERR_PTR(-ECHILD);
1708	1594	if (unlikely(status == -ECHILD))
1709	1595	/* we'd been told to redo it in non-rcu mode */
1710	1596	status = d_revalidate(dentry, nd->flags);
1711	1597	} else {
1712	1598	dentry = __d_lookup(parent, &nd->last);
1713	1599	if (unlikely(!dentry))
1714		- return 0;
	1600	+ return NULL;
1715	1601	status = d_revalidate(dentry, nd->flags);
1716	1602	}
1717	1603	if (unlikely(status <= 0)) {
1718	1604	if (!status)
1719	1605	d_invalidate(dentry);
1720	1606	dput(dentry);
1721		- return status;
	1607	+ return ERR_PTR(status);
1722	1608	}
1723		- if (unlikely(d_is_negative(dentry))) {
1724		- dput(dentry);
1725		- return -ENOENT;
1726		- }
1727		-
1728		- path->mnt = mnt;
1729		- path->dentry = dentry;
1730		- err = follow_managed(path, nd);
1731		- if (likely(err > 0))
1732		- *inode = d_backing_inode(path->dentry);
1733		- return err;
	1609	+ return dentry;
1734	1610	}
1735	1611
1736	1612	/* Fast lookup failed, do it the slow way */
..	..	@@ -1740,7 +1616,7 @@
1740	1616	{
1741	1617	struct dentry dentry, old;
1742	1618	struct inode *inode = dir->d_inode;
1743		- DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
	1619	+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1744	1620
1745	1621	/* Don't go there if it's already dead */
1746	1622	if (unlikely(IS_DEADDIR(inode)))
..	..	@@ -1750,17 +1626,15 @@
1750	1626	if (IS_ERR(dentry))
1751	1627	return dentry;
1752	1628	if (unlikely(!d_in_lookup(dentry))) {
1753		- if (!(flags & LOOKUP_NO_REVAL)) {
1754		- int error = d_revalidate(dentry, flags);
1755		- if (unlikely(error <= 0)) {
1756		- if (!error) {
1757		- d_invalidate(dentry);
1758		- dput(dentry);
1759		- goto again;
1760		- }
	1629	+ int error = d_revalidate(dentry, flags);
	1630	+ if (unlikely(error <= 0)) {
	1631	+ if (!error) {
	1632	+ d_invalidate(dentry);
1761	1633	dput(dentry);
1762		- dentry = ERR_PTR(error);
	1634	+ goto again;
1763	1635	}
	1636	+ dput(dentry);
	1637	+ dentry = ERR_PTR(error);
1764	1638	}
1765	1639	} else {
1766	1640	old = inode->i_op->lookup(inode, dentry, flags);
..	..	@@ -1788,71 +1662,112 @@
1788	1662	static inline int may_lookup(struct nameidata *nd)
1789	1663	{
1790	1664	if (nd->flags & LOOKUP_RCU) {
1791		- int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC\|MAY_NOT_BLOCK);
1792		- if (err != -ECHILD)
	1665	+ int err = inode_permission(nd->inode, MAY_EXEC\|MAY_NOT_BLOCK);
	1666	+ if (err != -ECHILD \|\| !try_to_unlazy(nd))
1793	1667	return err;
1794		- if (unlazy_walk(nd))
1795		- return -ECHILD;
1796	1668	}
1797		- return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC);
	1669	+ return inode_permission(nd->inode, MAY_EXEC);
1798	1670	}
1799	1671
1800		-static inline int handle_dots(struct nameidata *nd, int type)
	1672	+static int reserve_stack(struct nameidata nd, struct path link, unsigned seq)
1801	1673	{
1802		- if (type == LAST_DOTDOT) {
1803		- if (!nd->root.mnt)
1804		- set_root(nd);
1805		- if (nd->flags & LOOKUP_RCU) {
1806		- return follow_dotdot_rcu(nd);
1807		- } else
1808		- return follow_dotdot(nd);
1809		- }
1810		- return 0;
1811		-}
1812		-
1813		-static int pick_link(struct nameidata nd, struct path link,
1814		- struct inode *inode, unsigned seq)
1815		-{
1816		- int error;
1817		- struct saved *last;
1818		- if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
1819		- path_to_nameidata(link, nd);
	1674	+ if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
1820	1675	return -ELOOP;
1821		- }
1822		- if (!(nd->flags & LOOKUP_RCU)) {
1823		- if (link->mnt == nd->path.mnt)
1824		- mntget(link->mnt);
1825		- }
1826		- error = nd_alloc_stack(nd);
1827		- if (unlikely(error)) {
1828		- if (error == -ECHILD) {
1829		- if (unlikely(!legitimize_path(nd, link, seq))) {
1830		- drop_links(nd);
1831		- nd->depth = 0;
1832		- nd->flags &= ~LOOKUP_RCU;
1833		- nd->path.mnt = NULL;
1834		- nd->path.dentry = NULL;
1835		- if (!(nd->flags & LOOKUP_ROOT))
1836		- nd->root.mnt = NULL;
1837		- rcu_read_unlock();
1838		- } else if (likely(unlazy_walk(nd)) == 0)
1839		- error = nd_alloc_stack(nd);
1840		- }
1841		- if (error) {
1842		- path_put(link);
1843		- return error;
1844		- }
1845		- }
1846	1676
	1677	+ if (likely(nd->depth != EMBEDDED_LEVELS))
	1678	+ return 0;
	1679	+ if (likely(nd->stack != nd->internal))
	1680	+ return 0;
	1681	+ if (likely(nd_alloc_stack(nd)))
	1682	+ return 0;
	1683	+
	1684	+ if (nd->flags & LOOKUP_RCU) {
	1685	+ // we need to grab link before we do unlazy. And we can't skip
	1686	+ // unlazy even if we fail to grab the link - cleanup needs it
	1687	+ bool grabbed_link = legitimize_path(nd, link, seq);
	1688	+
	1689	+ if (!try_to_unlazy(nd) != 0 \|\| !grabbed_link)
	1690	+ return -ECHILD;
	1691	+
	1692	+ if (nd_alloc_stack(nd))
	1693	+ return 0;
	1694	+ }
	1695	+ return -ENOMEM;
	1696	+}
	1697	+
	1698	+enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
	1699	+
	1700	+static const char pick_link(struct nameidata nd, struct path *link,
	1701	+ struct inode *inode, unsigned seq, int flags)
	1702	+{
	1703	+ struct saved *last;
	1704	+ const char *res;
	1705	+ int error = reserve_stack(nd, link, seq);
	1706	+
	1707	+ if (unlikely(error)) {
	1708	+ if (!(nd->flags & LOOKUP_RCU))
	1709	+ path_put(link);
	1710	+ return ERR_PTR(error);
	1711	+ }
1847	1712	last = nd->stack + nd->depth++;
1848	1713	last->link = *link;
1849	1714	clear_delayed_call(&last->done);
1850		- nd->link_inode = inode;
1851	1715	last->seq = seq;
1852		- return 1;
1853		-}
1854	1716
1855		-enum {WALK_FOLLOW = 1, WALK_MORE = 2};
	1717	+ if (flags & WALK_TRAILING) {
	1718	+ error = may_follow_link(nd, inode);
	1719	+ if (unlikely(error))
	1720	+ return ERR_PTR(error);
	1721	+ }
	1722	+
	1723	+ if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) \|\|
	1724	+ unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
	1725	+ return ERR_PTR(-ELOOP);
	1726	+
	1727	+ if (!(nd->flags & LOOKUP_RCU)) {
	1728	+ touch_atime(&last->link);
	1729	+ cond_resched();
	1730	+ } else if (atime_needs_update(&last->link, inode)) {
	1731	+ if (!try_to_unlazy(nd))
	1732	+ return ERR_PTR(-ECHILD);
	1733	+ touch_atime(&last->link);
	1734	+ }
	1735	+
	1736	+ error = security_inode_follow_link(link->dentry, inode,
	1737	+ nd->flags & LOOKUP_RCU);
	1738	+ if (unlikely(error))
	1739	+ return ERR_PTR(error);
	1740	+
	1741	+ res = READ_ONCE(inode->i_link);
	1742	+ if (!res) {
	1743	+ const char * (get)(struct dentry , struct inode *,
	1744	+ struct delayed_call *);
	1745	+ get = inode->i_op->get_link;
	1746	+ if (nd->flags & LOOKUP_RCU) {
	1747	+ res = get(NULL, inode, &last->done);
	1748	+ if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
	1749	+ res = get(link->dentry, inode, &last->done);
	1750	+ } else {
	1751	+ res = get(link->dentry, inode, &last->done);
	1752	+ }
	1753	+ if (!res)
	1754	+ goto all_done;
	1755	+ if (IS_ERR(res))
	1756	+ return res;
	1757	+ }
	1758	+ if (*res == '/') {
	1759	+ error = nd_jump_root(nd);
	1760	+ if (unlikely(error))
	1761	+ return ERR_PTR(error);
	1762	+ while (unlikely(*++res == '/'))
	1763	+ ;
	1764	+ }
	1765	+ if (*res)
	1766	+ return res;
	1767	+all_done: // pure jump
	1768	+ put_link(nd);
	1769	+ return NULL;
	1770	+}
1856	1771
1857	1772	/*
1858	1773	* Do we need to follow links? We _really_ want to be able
..	..	@@ -1860,68 +1775,187 @@
1860	1775	* so we keep a cache of "no, this doesn't need follow_link"
1861	1776	* for the common case.
1862	1777	*/
1863		-static inline int step_into(struct nameidata nd, struct path path,
1864		- int flags, struct inode *inode, unsigned seq)
1865		-{
1866		- if (!(flags & WALK_MORE) && nd->depth)
1867		- put_link(nd);
1868		- if (likely(!d_is_symlink(path->dentry)) \|\|
1869		- !(flags & WALK_FOLLOW \|\| nd->flags & LOOKUP_FOLLOW)) {
1870		- /* not a symlink or should not follow */
1871		- path_to_nameidata(path, nd);
1872		- nd->inode = inode;
1873		- nd->seq = seq;
1874		- return 0;
1875		- }
1876		- /* make sure that d_is_symlink above matches inode */
1877		- if (nd->flags & LOOKUP_RCU) {
1878		- if (read_seqcount_retry(&path->dentry->d_seq, seq))
1879		- return -ECHILD;
1880		- }
1881		- return pick_link(nd, path, inode, seq);
1882		-}
1883		-
1884		-static int walk_component(struct nameidata *nd, int flags)
	1778	+static const char step_into(struct nameidata nd, int flags,
	1779	+ struct dentry dentry, struct inode inode, unsigned seq)
1885	1780	{
1886	1781	struct path path;
	1782	+ int err = handle_mounts(nd, dentry, &path, &inode, &seq);
	1783	+
	1784	+ if (err < 0)
	1785	+ return ERR_PTR(err);
	1786	+ if (likely(!d_is_symlink(path.dentry)) \|\|
	1787	+ ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) \|\|
	1788	+ (flags & WALK_NOFOLLOW)) {
	1789	+ /* not a symlink or should not follow */
	1790	+ if (!(nd->flags & LOOKUP_RCU)) {
	1791	+ dput(nd->path.dentry);
	1792	+ if (nd->path.mnt != path.mnt)
	1793	+ mntput(nd->path.mnt);
	1794	+ }
	1795	+ nd->path = path;
	1796	+ nd->inode = inode;
	1797	+ nd->seq = seq;
	1798	+ return NULL;
	1799	+ }
	1800	+ if (nd->flags & LOOKUP_RCU) {
	1801	+ /* make sure that d_is_symlink above matches inode */
	1802	+ if (read_seqcount_retry(&path.dentry->d_seq, seq))
	1803	+ return ERR_PTR(-ECHILD);
	1804	+ } else {
	1805	+ if (path.mnt == nd->path.mnt)
	1806	+ mntget(path.mnt);
	1807	+ }
	1808	+ return pick_link(nd, &path, inode, seq, flags);
	1809	+}
	1810	+
	1811	+static struct dentry follow_dotdot_rcu(struct nameidata nd,
	1812	+ struct inode **inodep,
	1813	+ unsigned *seqp)
	1814	+{
	1815	+ struct dentry parent, old;
	1816	+
	1817	+ if (path_equal(&nd->path, &nd->root))
	1818	+ goto in_root;
	1819	+ if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
	1820	+ struct path path;
	1821	+ unsigned seq;
	1822	+ if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
	1823	+ &nd->root, &path, &seq))
	1824	+ goto in_root;
	1825	+ if (unlikely(nd->flags & LOOKUP_NO_XDEV))
	1826	+ return ERR_PTR(-ECHILD);
	1827	+ nd->path = path;
	1828	+ nd->inode = path.dentry->d_inode;
	1829	+ nd->seq = seq;
	1830	+ if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
	1831	+ return ERR_PTR(-ECHILD);
	1832	+ /* we know that mountpoint was pinned */
	1833	+ }
	1834	+ old = nd->path.dentry;
	1835	+ parent = old->d_parent;
	1836	+ *inodep = parent->d_inode;
	1837	+ *seqp = read_seqcount_begin(&parent->d_seq);
	1838	+ if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
	1839	+ return ERR_PTR(-ECHILD);
	1840	+ if (unlikely(!path_connected(nd->path.mnt, parent)))
	1841	+ return ERR_PTR(-ECHILD);
	1842	+ return parent;
	1843	+in_root:
	1844	+ if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
	1845	+ return ERR_PTR(-ECHILD);
	1846	+ if (unlikely(nd->flags & LOOKUP_BENEATH))
	1847	+ return ERR_PTR(-ECHILD);
	1848	+ return NULL;
	1849	+}
	1850	+
	1851	+static struct dentry follow_dotdot(struct nameidata nd,
	1852	+ struct inode **inodep,
	1853	+ unsigned *seqp)
	1854	+{
	1855	+ struct dentry *parent;
	1856	+
	1857	+ if (path_equal(&nd->path, &nd->root))
	1858	+ goto in_root;
	1859	+ if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
	1860	+ struct path path;
	1861	+
	1862	+ if (!choose_mountpoint(real_mount(nd->path.mnt),
	1863	+ &nd->root, &path))
	1864	+ goto in_root;
	1865	+ path_put(&nd->path);
	1866	+ nd->path = path;
	1867	+ nd->inode = path.dentry->d_inode;
	1868	+ if (unlikely(nd->flags & LOOKUP_NO_XDEV))
	1869	+ return ERR_PTR(-EXDEV);
	1870	+ }
	1871	+ /* rare case of legitimate dget_parent()... */
	1872	+ parent = dget_parent(nd->path.dentry);
	1873	+ if (unlikely(!path_connected(nd->path.mnt, parent))) {
	1874	+ dput(parent);
	1875	+ return ERR_PTR(-ENOENT);
	1876	+ }
	1877	+ *seqp = 0;
	1878	+ *inodep = parent->d_inode;
	1879	+ return parent;
	1880	+
	1881	+in_root:
	1882	+ if (unlikely(nd->flags & LOOKUP_BENEATH))
	1883	+ return ERR_PTR(-EXDEV);
	1884	+ dget(nd->path.dentry);
	1885	+ return NULL;
	1886	+}
	1887	+
	1888	+static const char handle_dots(struct nameidata nd, int type)
	1889	+{
	1890	+ if (type == LAST_DOTDOT) {
	1891	+ const char *error = NULL;
	1892	+ struct dentry *parent;
	1893	+ struct inode *inode;
	1894	+ unsigned seq;
	1895	+
	1896	+ if (!nd->root.mnt) {
	1897	+ error = ERR_PTR(set_root(nd));
	1898	+ if (error)
	1899	+ return error;
	1900	+ }
	1901	+ if (nd->flags & LOOKUP_RCU)
	1902	+ parent = follow_dotdot_rcu(nd, &inode, &seq);
	1903	+ else
	1904	+ parent = follow_dotdot(nd, &inode, &seq);
	1905	+ if (IS_ERR(parent))
	1906	+ return ERR_CAST(parent);
	1907	+ if (unlikely(!parent))
	1908	+ error = step_into(nd, WALK_NOFOLLOW,
	1909	+ nd->path.dentry, nd->inode, nd->seq);
	1910	+ else
	1911	+ error = step_into(nd, WALK_NOFOLLOW,
	1912	+ parent, inode, seq);
	1913	+ if (unlikely(error))
	1914	+ return error;
	1915	+
	1916	+ if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
	1917	+ /*
	1918	+ * If there was a racing rename or mount along our
	1919	+ * path, then we can't be sure that ".." hasn't jumped
	1920	+ * above nd->root (and so userspace should retry or use
	1921	+ * some fallback).
	1922	+ */
	1923	+ smp_rmb();
	1924	+ if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
	1925	+ return ERR_PTR(-EAGAIN);
	1926	+ if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
	1927	+ return ERR_PTR(-EAGAIN);
	1928	+ }
	1929	+ }
	1930	+ return NULL;
	1931	+}
	1932	+
	1933	+static const char walk_component(struct nameidata nd, int flags)
	1934	+{
	1935	+ struct dentry *dentry;
1887	1936	struct inode *inode;
1888	1937	unsigned seq;
1889		- int err;
1890	1938	/*
1891	1939	* "." and ".." are special - ".." especially so because it has
1892	1940	* to be able to know about the current root directory and
1893	1941	* parent relationships.
1894	1942	*/
1895	1943	if (unlikely(nd->last_type != LAST_NORM)) {
1896		- err = handle_dots(nd, nd->last_type);
1897	1944	if (!(flags & WALK_MORE) && nd->depth)
1898	1945	put_link(nd);
1899		- return err;
	1946	+ return handle_dots(nd, nd->last_type);
1900	1947	}
1901		- err = lookup_fast(nd, &path, &inode, &seq);
1902		- if (unlikely(err <= 0)) {
1903		- if (err < 0)
1904		- return err;
1905		- path.dentry = lookup_slow(&nd->last, nd->path.dentry,
1906		- nd->flags);
1907		- if (IS_ERR(path.dentry))
1908		- return PTR_ERR(path.dentry);
1909		-
1910		- path.mnt = nd->path.mnt;
1911		- err = follow_managed(&path, nd);
1912		- if (unlikely(err < 0))
1913		- return err;
1914		-
1915		- if (unlikely(d_is_negative(path.dentry))) {
1916		- path_to_nameidata(&path, nd);
1917		- return -ENOENT;
1918		- }
1919		-
1920		- seq = 0; /* we are already out of RCU mode */
1921		- inode = d_backing_inode(path.dentry);
	1948	+ dentry = lookup_fast(nd, &inode, &seq);
	1949	+ if (IS_ERR(dentry))
	1950	+ return ERR_CAST(dentry);
	1951	+ if (unlikely(!dentry)) {
	1952	+ dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
	1953	+ if (IS_ERR(dentry))
	1954	+ return ERR_CAST(dentry);
1922	1955	}
1923		-
1924		- return step_into(nd, &path, flags, inode, seq);
	1956	+ if (!(flags & WALK_MORE) && nd->depth)
	1957	+ put_link(nd);
	1958	+ return step_into(nd, flags, dentry, inode, seq);
1925	1959	}
1926	1960
1927	1961	/*
..	..	@@ -2162,8 +2196,11 @@
2162	2196	*/
2163	2197	static int link_path_walk(const char name, struct nameidata nd)
2164	2198	{
	2199	+ int depth = 0; // depth <= nd->depth
2165	2200	int err;
2166	2201
	2202	+ nd->last_type = LAST_ROOT;
	2203	+ nd->flags \|= LOOKUP_PARENT;
2167	2204	if (IS_ERR(name))
2168	2205	return PTR_ERR(name);
2169	2206	while (*name=='/')
..	..	@@ -2173,6 +2210,7 @@
2173	2210
2174	2211	/* At this point we know we have a real path component. */
2175	2212	for(;;) {
	2213	+ const char *link;
2176	2214	u64 hash_len;
2177	2215	int type;
2178	2216
..	..	@@ -2222,40 +2260,31 @@
2222	2260	} while (unlikely(*name == '/'));
2223	2261	if (unlikely(!*name)) {
2224	2262	OK:
2225		- /* pathname body, done */
2226		- if (!nd->depth)
	2263	+ /* pathname or trailing symlink, done */
	2264	+ if (!depth) {
	2265	+ nd->dir_uid = nd->inode->i_uid;
	2266	+ nd->dir_mode = nd->inode->i_mode;
	2267	+ nd->flags &= ~LOOKUP_PARENT;
2227	2268	return 0;
2228		- name = nd->stack[nd->depth - 1].name;
2229		- /* trailing symlink, done */
2230		- if (!name)
2231		- return 0;
	2269	+ }
2232	2270	/* last component of nested symlink */
2233		- err = walk_component(nd, WALK_FOLLOW);
	2271	+ name = nd->stack[--depth].name;
	2272	+ link = walk_component(nd, 0);
2234	2273	} else {
2235	2274	/* not the last component */
2236		- err = walk_component(nd, WALK_FOLLOW \| WALK_MORE);
	2275	+ link = walk_component(nd, WALK_MORE);
2237	2276	}
2238		- if (err < 0)
2239		- return err;
2240		-
2241		- if (err) {
2242		- const char *s = get_link(nd);
2243		-
2244		- if (IS_ERR(s))
2245		- return PTR_ERR(s);
2246		- err = 0;
2247		- if (unlikely(!s)) {
2248		- /* jumped */
2249		- put_link(nd);
2250		- } else {
2251		- nd->stack[nd->depth - 1].name = name;
2252		- name = s;
2253		- continue;
2254		- }
	2277	+ if (unlikely(link)) {
	2278	+ if (IS_ERR(link))
	2279	+ return PTR_ERR(link);
	2280	+ /* a symlink to follow */
	2281	+ nd->stack[depth++].name = name;
	2282	+ name = link;
	2283	+ continue;
2255	2284	}
2256	2285	if (unlikely(!d_can_lookup(nd->path.dentry))) {
2257	2286	if (nd->flags & LOOKUP_RCU) {
2258		- if (unlazy_walk(nd))
	2287	+ if (!try_to_unlazy(nd))
2259	2288	return -ECHILD;
2260	2289	}
2261	2290	return -ENOTDIR;
..	..	@@ -2266,16 +2295,25 @@
2266	2295	/* must be paired with terminate_walk() */
2267	2296	static const char path_init(struct nameidata nd, unsigned flags)
2268	2297	{
	2298	+ int error;
2269	2299	const char *s = nd->name->name;
	2300	+
	2301	+ /* LOOKUP_CACHED requires RCU, ask caller to retry */
	2302	+ if ((flags & (LOOKUP_RCU \| LOOKUP_CACHED)) == LOOKUP_CACHED)
	2303	+ return ERR_PTR(-EAGAIN);
2270	2304
2271	2305	if (!*s)
2272	2306	flags &= ~LOOKUP_RCU;
2273	2307	if (flags & LOOKUP_RCU)
2274	2308	rcu_read_lock();
2275	2309
2276		- nd->last_type = LAST_ROOT; /* if there are only slashes... */
2277		- nd->flags = flags \| LOOKUP_JUMPED \| LOOKUP_PARENT;
	2310	+ nd->flags = flags \| LOOKUP_JUMPED;
2278	2311	nd->depth = 0;
	2312	+
	2313	+ nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
	2314	+ nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
	2315	+ smp_rmb();
	2316	+
2279	2317	if (flags & LOOKUP_ROOT) {
2280	2318	struct dentry *root = nd->root.dentry;
2281	2319	struct inode *inode = root->d_inode;
..	..	@@ -2284,9 +2322,8 @@
2284	2322	nd->path = nd->root;
2285	2323	nd->inode = inode;
2286	2324	if (flags & LOOKUP_RCU) {
2287		- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
	2325	+ nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
2288	2326	nd->root_seq = nd->seq;
2289		- nd->m_seq = read_seqbegin(&mount_lock);
2290	2327	} else {
2291	2328	path_get(&nd->path);
2292	2329	}
..	..	@@ -2294,16 +2331,17 @@
2294	2331	}
2295	2332
2296	2333	nd->root.mnt = NULL;
2297		- nd->path.mnt = NULL;
2298		- nd->path.dentry = NULL;
2299	2334
2300		- nd->m_seq = read_seqbegin(&mount_lock);
2301		- if (*s == '/') {
2302		- set_root(nd);
2303		- if (likely(!nd_jump_root(nd)))
2304		- return s;
2305		- return ERR_PTR(-ECHILD);
2306		- } else if (nd->dfd == AT_FDCWD) {
	2335	+ /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
	2336	+ if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
	2337	+ error = nd_jump_root(nd);
	2338	+ if (unlikely(error))
	2339	+ return ERR_PTR(error);
	2340	+ return s;
	2341	+ }
	2342	+
	2343	+ /* Relative pathname -- get the starting-point it is relative to. */
	2344	+ if (nd->dfd == AT_FDCWD) {
2307	2345	if (flags & LOOKUP_RCU) {
2308	2346	struct fs_struct *fs = current->fs;
2309	2347	unsigned seq;
..	..	@@ -2318,7 +2356,6 @@
2318	2356	get_fs_pwd(current->fs, &nd->path);
2319	2357	nd->inode = nd->path.dentry->d_inode;
2320	2358	}
2321		- return s;
2322	2359	} else {
2323	2360	/* Caller must check execute permissions on the starting path component */
2324	2361	struct fd f = fdget_raw(nd->dfd);
..	..	@@ -2343,58 +2380,35 @@
2343	2380	nd->inode = nd->path.dentry->d_inode;
2344	2381	}
2345	2382	fdput(f);
2346		- return s;
2347	2383	}
	2384	+
	2385	+ /* For scoped-lookups we need to set the root to the dirfd as well. */
	2386	+ if (flags & LOOKUP_IS_SCOPED) {
	2387	+ nd->root = nd->path;
	2388	+ if (flags & LOOKUP_RCU) {
	2389	+ nd->root_seq = nd->seq;
	2390	+ } else {
	2391	+ path_get(&nd->root);
	2392	+ nd->flags \|= LOOKUP_ROOT_GRABBED;
	2393	+ }
	2394	+ }
	2395	+ return s;
2348	2396	}
2349	2397
2350		-static const char trailing_symlink(struct nameidata nd)
2351		-{
2352		- const char *s;
2353		- int error = may_follow_link(nd);
2354		- if (unlikely(error))
2355		- return ERR_PTR(error);
2356		- nd->flags \|= LOOKUP_PARENT;
2357		- nd->stack[0].name = NULL;
2358		- s = get_link(nd);
2359		- return s ? s : "";
2360		-}
2361		-
2362		-static inline int lookup_last(struct nameidata *nd)
	2398	+static inline const char lookup_last(struct nameidata nd)
2363	2399	{
2364	2400	if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
2365	2401	nd->flags \|= LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
2366	2402
2367		- nd->flags &= ~LOOKUP_PARENT;
2368		- return walk_component(nd, 0);
	2403	+ return walk_component(nd, WALK_TRAILING);
2369	2404	}
2370	2405
2371	2406	static int handle_lookup_down(struct nameidata *nd)
2372	2407	{
2373		- struct path path = nd->path;
2374		- struct inode *inode = nd->inode;
2375		- unsigned seq = nd->seq;
2376		- int err;
2377		-
2378		- if (nd->flags & LOOKUP_RCU) {
2379		- /*
2380		- * don't bother with unlazy_walk on failure - we are
2381		- * at the very beginning of walk, so we lose nothing
2382		- * if we simply redo everything in non-RCU mode
2383		- */
2384		- if (unlikely(!__follow_mount_rcu(nd, &path, &inode, &seq)))
2385		- return -ECHILD;
2386		- } else {
2387		- dget(path.dentry);
2388		- err = follow_managed(&path, nd);
2389		- if (unlikely(err < 0))
2390		- return err;
2391		- inode = d_backing_inode(path.dentry);
2392		- seq = 0;
2393		- }
2394		- path_to_nameidata(&path, nd);
2395		- nd->inode = inode;
2396		- nd->seq = seq;
2397		- return 0;
	2408	+ if (!(nd->flags & LOOKUP_RCU))
	2409	+ dget(nd->path.dentry);
	2410	+ return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
	2411	+ nd->path.dentry, nd->inode, nd->seq));
2398	2412	}
2399	2413
2400	2414	/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
..	..	@@ -2409,9 +2423,12 @@
2409	2423	s = ERR_PTR(err);
2410	2424	}
2411	2425
2412		- while (!(err = link_path_walk(s, nd))
2413		- && ((err = lookup_last(nd)) > 0)) {
2414		- s = trailing_symlink(nd);
	2426	+ while (!(err = link_path_walk(s, nd)) &&
	2427	+ (s = lookup_last(nd)) != NULL)
	2428	+ ;
	2429	+ if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
	2430	+ err = handle_lookup_down(nd);
	2431	+ nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
2415	2432	}
2416	2433	if (!err)
2417	2434	err = complete_walk(nd);
..	..	@@ -2428,8 +2445,8 @@
2428	2445	return err;
2429	2446	}
2430	2447
2431		-static int filename_lookup(int dfd, struct filename *name, unsigned flags,
2432		- struct path path, struct path root)
	2448	+int filename_lookup(int dfd, struct filename *name, unsigned flags,
	2449	+ struct path path, struct path root)
2433	2450	{
2434	2451	int retval;
2435	2452	struct nameidata nd;
..	..	@@ -2447,7 +2464,8 @@
2447	2464	retval = path_lookupat(&nd, flags \| LOOKUP_REVAL, path);
2448	2465
2449	2466	if (likely(!retval))
2450		- audit_inode(name, path->dentry, flags & LOOKUP_PARENT);
	2467	+ audit_inode(name, path->dentry,
	2468	+ flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
2451	2469	restore_nameidata();
2452	2470	putname(name);
2453	2471	return retval;
..	..	@@ -2488,7 +2506,7 @@
2488	2506	if (likely(!retval)) {
2489	2507	*last = nd.last;
2490	2508	*type = nd.last_type;
2491		- audit_inode(name, parent->dentry, LOOKUP_PARENT);
	2509	+ audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
2492	2510	} else {
2493	2511	putname(name);
2494	2512	name = ERR_PTR(retval);
..	..	@@ -2550,8 +2568,8 @@
2550	2568	}
2551	2569	EXPORT_SYMBOL(vfs_path_lookup);
2552	2570
2553		-static int lookup_one_len_common(const char name, struct vfsmount mnt,
2554		- struct dentry base, int len, struct qstr this)
	2571	+static int lookup_one_len_common(const char name, struct dentry base,
	2572	+ int len, struct qstr *this)
2555	2573	{
2556	2574	this->name = name;
2557	2575	this->len = len;
..	..	@@ -2579,7 +2597,7 @@
2579	2597	return err;
2580	2598	}
2581	2599
2582		- return inode_permission2(mnt, base->d_inode, MAY_EXEC);
	2600	+ return inode_permission(base->d_inode, MAY_EXEC);
2583	2601	}
2584	2602
2585	2603	/**
..	..	@@ -2603,7 +2621,7 @@
2603	2621
2604	2622	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2605	2623
2606		- err = lookup_one_len_common(name, NULL, base, len, &this);
	2624	+ err = lookup_one_len_common(name, base, len, &this);
2607	2625	if (err)
2608	2626	return ERR_PTR(err);
2609	2627
..	..	@@ -2622,7 +2640,7 @@
2622	2640	*
2623	2641	* The caller must hold base->i_mutex.
2624	2642	*/
2625		-struct dentry lookup_one_len2(const char name, struct vfsmount mnt, struct dentry base, int len)
	2643	+struct dentry lookup_one_len(const char name, struct dentry *base, int len)
2626	2644	{
2627	2645	struct dentry *dentry;
2628	2646	struct qstr this;
..	..	@@ -2630,18 +2648,12 @@
2630	2648
2631	2649	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2632	2650
2633		- err = lookup_one_len_common(name, mnt, base, len, &this);
	2651	+ err = lookup_one_len_common(name, base, len, &this);
2634	2652	if (err)
2635	2653	return ERR_PTR(err);
2636	2654
2637	2655	dentry = lookup_dcache(&this, base, 0);
2638	2656	return dentry ? dentry : __lookup_slow(&this, base, 0);
2639		-}
2640		-EXPORT_SYMBOL(lookup_one_len2);
2641		-
2642		-struct dentry lookup_one_len(const char name, struct dentry *base, int len)
2643		-{
2644		- return lookup_one_len2(name, NULL, base, len);
2645	2657	}
2646	2658	EXPORT_SYMBOL(lookup_one_len);
2647	2659
..	..	@@ -2664,7 +2676,7 @@
2664	2676	int err;
2665	2677	struct dentry *ret;
2666	2678
2667		- err = lookup_one_len_common(name, NULL, base, len, &this);
	2679	+ err = lookup_one_len_common(name, base, len, &this);
2668	2680	if (err)
2669	2681	return ERR_PTR(err);
2670	2682
..	..	@@ -2675,30 +2687,49 @@
2675	2687	}
2676	2688	EXPORT_SYMBOL(lookup_one_len_unlocked);
2677	2689
	2690	+/*
	2691	+ * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
	2692	+ * on negatives. Returns known positive or ERR_PTR(); that's what
	2693	+ * most of the users want. Note that pinned negative with unlocked parent
	2694	+ * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
	2695	+ * need to be very careful; pinned positives have ->d_inode stable, so
	2696	+ * this one avoids such problems.
	2697	+ */
	2698	+struct dentry lookup_positive_unlocked(const char name,
	2699	+ struct dentry *base, int len)
	2700	+{
	2701	+ struct dentry *ret = lookup_one_len_unlocked(name, base, len);
	2702	+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
	2703	+ dput(ret);
	2704	+ ret = ERR_PTR(-ENOENT);
	2705	+ }
	2706	+ return ret;
	2707	+}
	2708	+EXPORT_SYMBOL(lookup_positive_unlocked);
	2709	+
2678	2710	#ifdef CONFIG_UNIX98_PTYS
2679	2711	int path_pts(struct path *path)
2680	2712	{
2681	2713	/* Find something mounted on "pts" in the same directory as
2682	2714	* the input path.
2683	2715	*/
2684		- struct dentry child, parent;
2685		- struct qstr this;
2686		- int ret;
	2716	+ struct dentry *parent = dget_parent(path->dentry);
	2717	+ struct dentry *child;
	2718	+ struct qstr this = QSTR_INIT("pts", 3);
2687	2719
2688		- ret = path_parent_directory(path);
2689		- if (ret)
2690		- return ret;
2691		-
2692		- parent = path->dentry;
2693		- this.name = "pts";
2694		- this.len = 3;
	2720	+ if (unlikely(!path_connected(path->mnt, parent))) {
	2721	+ dput(parent);
	2722	+ return -ENOENT;
	2723	+ }
	2724	+ dput(path->dentry);
	2725	+ path->dentry = parent;
2695	2726	child = d_hash_and_lookup(parent, &this);
2696		- if (!child)
	2727	+ if (IS_ERR_OR_NULL(child))
2697	2728	return -ENOENT;
2698	2729
2699	2730	path->dentry = child;
2700	2731	dput(parent);
2701		- follow_mount(path);
	2732	+ follow_down(path);
2702	2733	return 0;
2703	2734	}
2704	2735	#endif
..	..	@@ -2710,151 +2741,6 @@
2710	2741	flags, path, NULL);
2711	2742	}
2712	2743	EXPORT_SYMBOL(user_path_at_empty);
2713		-
2714		-/**
2715		- * mountpoint_last - look up last component for umount
2716		- * @nd: pathwalk nameidata - currently pointing at parent directory of "last"
2717		- *
2718		- * This is a special lookup_last function just for umount. In this case, we
2719		- * need to resolve the path without doing any revalidation.
2720		- *
2721		- * The nameidata should be the result of doing a LOOKUP_PARENT pathwalk. Since
2722		- * mountpoints are always pinned in the dcache, their ancestors are too. Thus,
2723		- * in almost all cases, this lookup will be served out of the dcache. The only
2724		- * cases where it won't are if nd->last refers to a symlink or the path is
2725		- * bogus and it doesn't exist.
2726		- *
2727		- * Returns:
2728		- * -error: if there was an error during lookup. This includes -ENOENT if the
2729		- * lookup found a negative dentry.
2730		- *
2731		- * 0: if we successfully resolved nd->last and found it to not to be a
2732		- * symlink that needs to be followed.
2733		- *
2734		- * 1: if we successfully resolved nd->last and found it to be a symlink
2735		- * that needs to be followed.
2736		- */
2737		-static int
2738		-mountpoint_last(struct nameidata *nd)
2739		-{
2740		- int error = 0;
2741		- struct dentry *dir = nd->path.dentry;
2742		- struct path path;
2743		-
2744		- /* If we're in rcuwalk, drop out of it to handle last component */
2745		- if (nd->flags & LOOKUP_RCU) {
2746		- if (unlazy_walk(nd))
2747		- return -ECHILD;
2748		- }
2749		-
2750		- nd->flags &= ~LOOKUP_PARENT;
2751		-
2752		- if (unlikely(nd->last_type != LAST_NORM)) {
2753		- error = handle_dots(nd, nd->last_type);
2754		- if (error)
2755		- return error;
2756		- path.dentry = dget(nd->path.dentry);
2757		- } else {
2758		- path.dentry = d_lookup(dir, &nd->last);
2759		- if (!path.dentry) {
2760		- /*
2761		- * No cached dentry. Mounted dentries are pinned in the
2762		- * cache, so that means that this dentry is probably
2763		- * a symlink or the path doesn't actually point
2764		- * to a mounted dentry.
2765		- */
2766		- path.dentry = lookup_slow(&nd->last, dir,
2767		- nd->flags \| LOOKUP_NO_REVAL);
2768		- if (IS_ERR(path.dentry))
2769		- return PTR_ERR(path.dentry);
2770		- }
2771		- }
2772		- if (d_is_negative(path.dentry)) {
2773		- dput(path.dentry);
2774		- return -ENOENT;
2775		- }
2776		- path.mnt = nd->path.mnt;
2777		- return step_into(nd, &path, 0, d_backing_inode(path.dentry), 0);
2778		-}
2779		-
2780		-/**
2781		- * path_mountpoint - look up a path to be umounted
2782		- * @nd: lookup context
2783		- * @flags: lookup flags
2784		- * @path: pointer to container for result
2785		- *
2786		- * Look up the given name, but don't attempt to revalidate the last component.
2787		- * Returns 0 and "path" will be valid on success; Returns error otherwise.
2788		- */
2789		-static int
2790		-path_mountpoint(struct nameidata nd, unsigned flags, struct path path)
2791		-{
2792		- const char *s = path_init(nd, flags);
2793		- int err;
2794		-
2795		- while (!(err = link_path_walk(s, nd)) &&
2796		- (err = mountpoint_last(nd)) > 0) {
2797		- s = trailing_symlink(nd);
2798		- }
2799		- if (!err) {
2800		- *path = nd->path;
2801		- nd->path.mnt = NULL;
2802		- nd->path.dentry = NULL;
2803		- follow_mount(path);
2804		- }
2805		- terminate_walk(nd);
2806		- return err;
2807		-}
2808		-
2809		-static int
2810		-filename_mountpoint(int dfd, struct filename name, struct path path,
2811		- unsigned int flags)
2812		-{
2813		- struct nameidata nd;
2814		- int error;
2815		- if (IS_ERR(name))
2816		- return PTR_ERR(name);
2817		- set_nameidata(&nd, dfd, name);
2818		- error = path_mountpoint(&nd, flags \| LOOKUP_RCU, path);
2819		- if (unlikely(error == -ECHILD))
2820		- error = path_mountpoint(&nd, flags, path);
2821		- if (unlikely(error == -ESTALE))
2822		- error = path_mountpoint(&nd, flags \| LOOKUP_REVAL, path);
2823		- if (likely(!error))
2824		- audit_inode(name, path->dentry, 0);
2825		- restore_nameidata();
2826		- putname(name);
2827		- return error;
2828		-}
2829		-
2830		-/**
2831		- * user_path_mountpoint_at - lookup a path from userland in order to umount it
2832		- * @dfd: directory file descriptor
2833		- * @name: pathname from userland
2834		- * @flags: lookup flags
2835		- * @path: pointer to container to hold result
2836		- *
2837		- * A umount is a special case for path walking. We're not actually interested
2838		- * in the inode in this situation, and ESTALE errors can be a problem. We
2839		- * simply want track down the dentry and vfsmount attached at the mountpoint
2840		- * and avoid revalidating the last component.
2841		- *
2842		- * Returns 0 and populates "path" on success.
2843		- */
2844		-int
2845		-user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags,
2846		- struct path *path)
2847		-{
2848		- return filename_mountpoint(dfd, getname(name), path, flags);
2849		-}
2850		-
2851		-int
2852		-kern_path_mountpoint(int dfd, const char name, struct path path,
2853		- unsigned int flags)
2854		-{
2855		- return filename_mountpoint(dfd, getname_kernel(name), path, flags);
2856		-}
2857		-EXPORT_SYMBOL(kern_path_mountpoint);
2858	2744
2859	2745	int __check_sticky(struct inode dir, struct inode inode)
2860	2746	{
..	..	@@ -2888,7 +2774,7 @@
2888	2774	* 11. We don't allow removal of NFS sillyrenamed files; it's handled by
2889	2775	* nfs_async_unlink().
2890	2776	*/
2891		-static int may_delete(struct vfsmount mnt, struct inode dir, struct dentry *victim, bool isdir)
	2777	+static int may_delete(struct inode dir, struct dentry victim, bool isdir)
2892	2778	{
2893	2779	struct inode *inode = d_backing_inode(victim);
2894	2780	int error;
..	..	@@ -2905,7 +2791,7 @@
2905	2791
2906	2792	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
2907	2793
2908		- error = inode_permission2(mnt, dir, MAY_WRITE \| MAY_EXEC);
	2794	+ error = inode_permission(dir, MAY_WRITE \| MAY_EXEC);
2909	2795	if (error)
2910	2796	return error;
2911	2797	if (IS_APPEND(dir))
..	..	@@ -2937,7 +2823,7 @@
2937	2823	* 4. We should have write and exec permissions on dir
2938	2824	* 5. We can't do it if dir is immutable (done in permission())
2939	2825	*/
2940		-static inline int may_create(struct vfsmount mnt, struct inode dir, struct dentry *child)
	2826	+static inline int may_create(struct inode dir, struct dentry child)
2941	2827	{
2942	2828	struct user_namespace *s_user_ns;
2943	2829	audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
..	..	@@ -2949,7 +2835,7 @@
2949	2835	if (!kuid_has_mapping(s_user_ns, current_fsuid()) \|\|
2950	2836	!kgid_has_mapping(s_user_ns, current_fsgid()))
2951	2837	return -EOVERFLOW;
2952		- return inode_permission2(mnt, dir, MAY_WRITE \| MAY_EXEC);
	2838	+ return inode_permission(dir, MAY_WRITE \| MAY_EXEC);
2953	2839	}
2954	2840
2955	2841	/*
..	..	@@ -2980,8 +2866,8 @@
2980	2866	return p;
2981	2867	}
2982	2868
2983		- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
2984		- inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
	2869	+ lock_two_inodes(p1->d_inode, p2->d_inode,
	2870	+ I_MUTEX_PARENT, I_MUTEX_PARENT2);
2985	2871	return NULL;
2986	2872	}
2987	2873	EXPORT_SYMBOL(lock_rename);
..	..	@@ -2996,17 +2882,74 @@
2996	2882	}
2997	2883	EXPORT_SYMBOL(unlock_rename);
2998	2884
2999		-int vfs_create2(struct vfsmount mnt, struct inode dir, struct dentry *dentry,
3000		- umode_t mode, bool want_excl)
	2885	+/**
	2886	+ * mode_strip_umask - handle vfs umask stripping
	2887	+ * @dir: parent directory of the new inode
	2888	+ * @mode: mode of the new inode to be created in @dir
	2889	+ *
	2890	+ * Umask stripping depends on whether or not the filesystem supports POSIX
	2891	+ * ACLs. If the filesystem doesn't support it umask stripping is done directly
	2892	+ * in here. If the filesystem does support POSIX ACLs umask stripping is
	2893	+ * deferred until the filesystem calls posix_acl_create().
	2894	+ *
	2895	+ * Returns: mode
	2896	+ */
	2897	+static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
3001	2898	{
3002		- int error = may_create(mnt, dir, dentry);
	2899	+ if (!IS_POSIXACL(dir))
	2900	+ mode &= ~current_umask();
	2901	+ return mode;
	2902	+}
	2903	+
	2904	+/**
	2905	+ * vfs_prepare_mode - prepare the mode to be used for a new inode
	2906	+ * @dir: parent directory of the new inode
	2907	+ * @mode: mode of the new inode
	2908	+ * @mask_perms: allowed permission by the vfs
	2909	+ * @type: type of file to be created
	2910	+ *
	2911	+ * This helper consolidates and enforces vfs restrictions on the @mode of a new
	2912	+ * object to be created.
	2913	+ *
	2914	+ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
	2915	+ * the kernel documentation for mode_strip_umask()). Moving umask stripping
	2916	+ * after setgid stripping allows the same ordering for both non-POSIX ACL and
	2917	+ * POSIX ACL supporting filesystems.
	2918	+ *
	2919	+ * Note that it's currently valid for @type to be 0 if a directory is created.
	2920	+ * Filesystems raise that flag individually and we need to check whether each
	2921	+ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
	2922	+ * non-zero type.
	2923	+ *
	2924	+ * Returns: mode to be passed to the filesystem
	2925	+ */
	2926	+static inline umode_t vfs_prepare_mode(const struct inode *dir, umode_t mode,
	2927	+ umode_t mask_perms, umode_t type)
	2928	+{
	2929	+ mode = mode_strip_sgid(dir, mode);
	2930	+ mode = mode_strip_umask(dir, mode);
	2931	+
	2932	+ /*
	2933	+ * Apply the vfs mandated allowed permission mask and set the type of
	2934	+ * file to be created before we call into the filesystem.
	2935	+ */
	2936	+ mode &= (mask_perms & ~S_IFMT);
	2937	+ mode \|= (type & S_IFMT);
	2938	+
	2939	+ return mode;
	2940	+}
	2941	+
	2942	+int vfs_create(struct inode dir, struct dentry dentry, umode_t mode,
	2943	+ bool want_excl)
	2944	+{
	2945	+ int error = may_create(dir, dentry);
3003	2946	if (error)
3004	2947	return error;
3005	2948
3006	2949	if (!dir->i_op->create)
3007	2950	return -EACCES; /* shouldn't it be ENOSYS? */
3008		- mode &= S_IALLUGO;
3009		- mode \|= S_IFREG;
	2951	+
	2952	+ mode = vfs_prepare_mode(dir, mode, S_IALLUGO, S_IFREG);
3010	2953	error = security_inode_create(dir, dentry, mode);
3011	2954	if (error)
3012	2955	return error;
..	..	@@ -3015,21 +2958,14 @@
3015	2958	fsnotify_create(dir, dentry);
3016	2959	return error;
3017	2960	}
3018		-EXPORT_SYMBOL(vfs_create2);
	2961	+EXPORT_SYMBOL_NS(vfs_create, ANDROID_GKI_VFS_EXPORT_ONLY);
3019	2962
3020		-int vfs_create(struct inode dir, struct dentry dentry, umode_t mode,
3021		- bool want_excl)
3022		-{
3023		- return vfs_create2(NULL, dir, dentry, mode, want_excl);
3024		-}
3025		-EXPORT_SYMBOL(vfs_create);
3026		-
3027		-int vfs_mkobj2(struct vfsmount mnt, struct dentry dentry, umode_t mode,
	2963	+int vfs_mkobj(struct dentry *dentry, umode_t mode,
3028	2964	int (f)(struct dentry , umode_t, void *),
3029	2965	void *arg)
3030	2966	{
3031	2967	struct inode *dir = dentry->d_parent->d_inode;
3032		- int error = may_create(mnt, dir, dentry);
	2968	+ int error = may_create(dir, dentry);
3033	2969	if (error)
3034	2970	return error;
3035	2971
..	..	@@ -3043,15 +2979,6 @@
3043	2979	fsnotify_create(dir, dentry);
3044	2980	return error;
3045	2981	}
3046		-EXPORT_SYMBOL(vfs_mkobj2);
3047		-
3048		-
3049		-int vfs_mkobj(struct dentry *dentry, umode_t mode,
3050		- int (f)(struct dentry , umode_t, void *),
3051		- void *arg)
3052		-{
3053		- return vfs_mkobj2(NULL, dentry, mode, f, arg);
3054		-}
3055	2982	EXPORT_SYMBOL(vfs_mkobj);
3056	2983
3057	2984	bool may_open_dev(const struct path *path)
..	..	@@ -3063,7 +2990,6 @@
3063	2990	static int may_open(const struct path *path, int acc_mode, int flag)
3064	2991	{
3065	2992	struct dentry *dentry = path->dentry;
3066		- struct vfsmount *mnt = path->mnt;
3067	2993	struct inode *inode = dentry->d_inode;
3068	2994	int error;
3069	2995
..	..	@@ -3076,19 +3002,27 @@
3076	3002	case S_IFDIR:
3077	3003	if (acc_mode & MAY_WRITE)
3078	3004	return -EISDIR;
	3005	+ if (acc_mode & MAY_EXEC)
	3006	+ return -EACCES;
3079	3007	break;
3080	3008	case S_IFBLK:
3081	3009	case S_IFCHR:
3082	3010	if (!may_open_dev(path))
3083	3011	return -EACCES;
3084		- /FALLTHRU/
	3012	+ fallthrough;
3085	3013	case S_IFIFO:
3086	3014	case S_IFSOCK:
	3015	+ if (acc_mode & MAY_EXEC)
	3016	+ return -EACCES;
3087	3017	flag &= ~O_TRUNC;
	3018	+ break;
	3019	+ case S_IFREG:
	3020	+ if ((acc_mode & MAY_EXEC) && path_noexec(path))
	3021	+ return -EACCES;
3088	3022	break;
3089	3023	}
3090	3024
3091		- error = inode_permission2(mnt, inode, MAY_OPEN \| acc_mode);
	3025	+ error = inode_permission(inode, MAY_OPEN \| acc_mode);
3092	3026	if (error)
3093	3027	return error;
3094	3028
..	..	@@ -3123,7 +3057,7 @@
3123	3057	if (!error)
3124	3058	error = security_path_truncate(path);
3125	3059	if (!error) {
3126		- error = do_truncate2(path->mnt, path->dentry, 0,
	3060	+ error = do_truncate(path->dentry, 0,
3127	3061	ATTR_MTIME\|ATTR_CTIME\|ATTR_OPEN,
3128	3062	filp);
3129	3063	}
..	..	@@ -3150,7 +3084,7 @@
3150	3084	!kgid_has_mapping(s_user_ns, current_fsgid()))
3151	3085	return -EOVERFLOW;
3152	3086
3153		- error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE \| MAY_EXEC);
	3087	+ error = inode_permission(dir->dentry->d_inode, MAY_WRITE \| MAY_EXEC);
3154	3088	if (error)
3155	3089	return error;
3156	3090
..	..	@@ -3170,17 +3104,13 @@
3170	3104	*
3171	3105	* Returns an error code otherwise.
3172	3106	*/
3173		-static int atomic_open(struct nameidata nd, struct dentry dentry,
3174		- struct path path, struct file file,
3175		- const struct open_flags *op,
3176		- int open_flag, umode_t mode)
	3107	+static struct dentry atomic_open(struct nameidata nd, struct dentry *dentry,
	3108	+ struct file *file,
	3109	+ int open_flag, umode_t mode)
3177	3110	{
3178	3111	struct dentry const DENTRY_NOT_SET = (void ) -1UL;
3179	3112	struct inode *dir = nd->path.dentry->d_inode;
3180	3113	int error;
3181		-
3182		- if (!(~open_flag & (O_EXCL \| O_CREAT))) /* both O_EXCL and O_CREAT */
3183		- open_flag &= ~O_TRUNC;
3184	3114
3185	3115	if (nd->flags & LOOKUP_DIRECTORY)
3186	3116	open_flag \|= O_DIRECTORY;
..	..	@@ -3192,19 +3122,10 @@
3192	3122	d_lookup_done(dentry);
3193	3123	if (!error) {
3194	3124	if (file->f_mode & FMODE_OPENED) {
3195		- /*
3196		- * We didn't have the inode before the open, so check open
3197		- * permission here.
3198		- */
3199		- int acc_mode = op->acc_mode;
3200		- if (file->f_mode & FMODE_CREATED) {
3201		- WARN_ON(!(open_flag & O_CREAT));
3202		- fsnotify_create(dir, dentry);
3203		- acc_mode = 0;
	3125	+ if (unlikely(dentry != file->f_path.dentry)) {
	3126	+ dput(dentry);
	3127	+ dentry = dget(file->f_path.dentry);
3204	3128	}
3205		- error = may_open(&file->f_path, acc_mode, open_flag);
3206		- if (WARN_ON(error > 0))
3207		- error = -EINVAL;
3208	3129	} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
3209	3130	error = -EIO;
3210	3131	} else {
..	..	@@ -3212,19 +3133,15 @@
3212	3133	dput(dentry);
3213	3134	dentry = file->f_path.dentry;
3214	3135	}
3215		- if (file->f_mode & FMODE_CREATED)
3216		- fsnotify_create(dir, dentry);
3217		- if (unlikely(d_is_negative(dentry))) {
	3136	+ if (unlikely(d_is_negative(dentry)))
3218	3137	error = -ENOENT;
3219		- } else {
3220		- path->dentry = dentry;
3221		- path->mnt = nd->path.mnt;
3222		- return 0;
3223		- }
3224	3138	}
3225	3139	}
3226		- dput(dentry);
3227		- return error;
	3140	+ if (error) {
	3141	+ dput(dentry);
	3142	+ dentry = ERR_PTR(error);
	3143	+ }
	3144	+ return dentry;
3228	3145	}
3229	3146
3230	3147	/*
..	..	@@ -3242,10 +3159,9 @@
3242	3159	*
3243	3160	* An error code is returned on failure.
3244	3161	*/
3245		-static int lookup_open(struct nameidata nd, struct path path,
3246		- struct file *file,
3247		- const struct open_flags *op,
3248		- bool got_write)
	3162	+static struct dentry lookup_open(struct nameidata nd, struct file *file,
	3163	+ const struct open_flags *op,
	3164	+ bool got_write)
3249	3165	{
3250	3166	struct dentry *dir = nd->path.dentry;
3251	3167	struct inode *dir_inode = dir->d_inode;
..	..	@@ -3253,10 +3169,10 @@
3253	3169	struct dentry *dentry;
3254	3170	int error, create_error = 0;
3255	3171	umode_t mode = op->mode;
3256		- DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
	3172	+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
3257	3173
3258	3174	if (unlikely(IS_DEADDIR(dir_inode)))
3259		- return -ENOENT;
	3175	+ return ERR_PTR(-ENOENT);
3260	3176
3261	3177	file->f_mode &= ~FMODE_CREATED;
3262	3178	dentry = d_lookup(dir, &nd->last);
..	..	@@ -3264,7 +3180,7 @@
3264	3180	if (!dentry) {
3265	3181	dentry = d_alloc_parallel(dir, &nd->last, &wq);
3266	3182	if (IS_ERR(dentry))
3267		- return PTR_ERR(dentry);
	3183	+ return dentry;
3268	3184	}
3269	3185	if (d_in_lookup(dentry))
3270	3186	break;
..	..	@@ -3280,7 +3196,7 @@
3280	3196	}
3281	3197	if (dentry->d_inode) {
3282	3198	/* Cached positive dentry: will open in f_op->open */
3283		- goto out_no_open;
	3199	+ return dentry;
3284	3200	}
3285	3201
3286	3202	/*
..	..	@@ -3292,41 +3208,26 @@
3292	3208	* Another problem is returing the "right" error value (e.g. for an
3293	3209	* O_EXCL open we want to return EEXIST not EROFS).
3294	3210	*/
	3211	+ if (unlikely(!got_write))
	3212	+ open_flag &= ~O_TRUNC;
3295	3213	if (open_flag & O_CREAT) {
3296		- if (!IS_POSIXACL(dir->d_inode))
3297		- mode &= ~current_umask();
3298		- if (unlikely(!got_write)) {
3299		- create_error = -EROFS;
3300		- open_flag &= ~O_CREAT;
3301		- if (open_flag & (O_EXCL \| O_TRUNC))
3302		- goto no_open;
3303		- /* No side effects, safe to clear O_CREAT */
3304		- } else {
	3214	+ if (open_flag & O_EXCL)
	3215	+ open_flag &= ~O_TRUNC;
	3216	+ mode = vfs_prepare_mode(dir->d_inode, mode, mode, mode);
	3217	+ if (likely(got_write))
3305	3218	create_error = may_o_create(&nd->path, dentry, mode);
3306		- if (create_error) {
3307		- open_flag &= ~O_CREAT;
3308		- if (open_flag & O_EXCL)
3309		- goto no_open;
3310		- }
3311		- }
3312		- } else if ((open_flag & (O_TRUNC\|O_WRONLY\|O_RDWR)) &&
3313		- unlikely(!got_write)) {
3314		- /*
3315		- * No O_CREATE -> atomicity not a requirement -> fall
3316		- * back to lookup + open
3317		- */
3318		- goto no_open;
	3219	+ else
	3220	+ create_error = -EROFS;
3319	3221	}
3320		-
	3222	+ if (create_error)
	3223	+ open_flag &= ~O_CREAT;
3321	3224	if (dir_inode->i_op->atomic_open) {
3322		- error = atomic_open(nd, dentry, path, file, op, open_flag,
3323		- mode);
3324		- if (unlikely(error == -ENOENT) && create_error)
3325		- error = create_error;
3326		- return error;
	3225	+ dentry = atomic_open(nd, dentry, file, open_flag, mode);
	3226	+ if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
	3227	+ dentry = ERR_PTR(create_error);
	3228	+ return dentry;
3327	3229	}
3328	3230
3329		-no_open:
3330	3231	if (d_in_lookup(dentry)) {
3331	3232	struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
3332	3233	nd->flags);
..	..	@@ -3353,84 +3254,62 @@
3353	3254	open_flag & O_EXCL);
3354	3255	if (error)
3355	3256	goto out_dput;
3356		- fsnotify_create(dir_inode, dentry);
3357	3257	}
3358	3258	if (unlikely(create_error) && !dentry->d_inode) {
3359	3259	error = create_error;
3360	3260	goto out_dput;
3361	3261	}
3362		-out_no_open:
3363		- path->dentry = dentry;
3364		- path->mnt = nd->path.mnt;
3365		- return 0;
	3262	+ return dentry;
3366	3263
3367	3264	out_dput:
3368	3265	dput(dentry);
3369		- return error;
	3266	+ return ERR_PTR(error);
3370	3267	}
3371	3268
3372		-/*
3373		- * Handle the last step of open()
3374		- */
3375		-static int do_last(struct nameidata *nd,
	3269	+static const char open_last_lookups(struct nameidata nd,
3376	3270	struct file file, const struct open_flags op)
3377	3271	{
3378	3272	struct dentry *dir = nd->path.dentry;
3379		- kuid_t dir_uid = nd->inode->i_uid;
3380		- umode_t dir_mode = nd->inode->i_mode;
3381	3273	int open_flag = op->open_flag;
3382		- bool will_truncate = (open_flag & O_TRUNC) != 0;
3383	3274	bool got_write = false;
3384		- int acc_mode = op->acc_mode;
3385	3275	unsigned seq;
3386	3276	struct inode *inode;
3387		- struct path path;
3388		- int error;
	3277	+ struct dentry *dentry;
	3278	+ const char *res;
3389	3279
3390		- nd->flags &= ~LOOKUP_PARENT;
3391	3280	nd->flags \|= op->intent;
3392	3281
3393	3282	if (nd->last_type != LAST_NORM) {
3394		- error = handle_dots(nd, nd->last_type);
3395		- if (unlikely(error))
3396		- return error;
3397		- goto finish_open;
	3283	+ if (nd->depth)
	3284	+ put_link(nd);
	3285	+ return handle_dots(nd, nd->last_type);
3398	3286	}
3399	3287
3400	3288	if (!(open_flag & O_CREAT)) {
3401	3289	if (nd->last.name[nd->last.len])
3402	3290	nd->flags \|= LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
3403	3291	/* we _can_ be in RCU mode here */
3404		- error = lookup_fast(nd, &path, &inode, &seq);
3405		- if (likely(error > 0))
	3292	+ dentry = lookup_fast(nd, &inode, &seq);
	3293	+ if (IS_ERR(dentry))
	3294	+ return ERR_CAST(dentry);
	3295	+ if (likely(dentry))
3406	3296	goto finish_lookup;
3407	3297
3408		- if (error < 0)
3409		- return error;
3410		-
3411		- BUG_ON(nd->inode != dir->d_inode);
3412	3298	BUG_ON(nd->flags & LOOKUP_RCU);
3413	3299	} else {
3414	3300	/* create side of things */
3415		- /*
3416		- * This will only deal with leaving RCU mode - LOOKUP_JUMPED
3417		- * has been cleared when we got to the last component we are
3418		- * about to look up
3419		- */
3420		- error = complete_walk(nd);
3421		- if (error)
3422		- return error;
3423		-
3424		- audit_inode(nd->name, dir, LOOKUP_PARENT);
	3301	+ if (nd->flags & LOOKUP_RCU) {
	3302	+ if (!try_to_unlazy(nd))
	3303	+ return ERR_PTR(-ECHILD);
	3304	+ }
	3305	+ audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
3425	3306	/* trailing slashes? */
3426	3307	if (unlikely(nd->last.name[nd->last.len]))
3427		- return -EISDIR;
	3308	+ return ERR_PTR(-EISDIR);
3428	3309	}
3429	3310
3430	3311	if (open_flag & (O_CREAT \| O_TRUNC \| O_WRONLY \| O_RDWR)) {
3431		- error = mnt_want_write(nd->path.mnt);
3432		- if (!error)
3433		- got_write = true;
	3312	+ got_write = !mnt_want_write(nd->path.mnt);
3434	3313	/*
3435	3314	* do _not_ fail yet - we might not need that or fail with
3436	3315	* a different error; let lookup_open() decide; we'll be
..	..	@@ -3441,113 +3320,90 @@
3441	3320	inode_lock(dir->d_inode);
3442	3321	else
3443	3322	inode_lock_shared(dir->d_inode);
3444		- error = lookup_open(nd, &path, file, op, got_write);
	3323	+ dentry = lookup_open(nd, file, op, got_write);
	3324	+ if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
	3325	+ fsnotify_create(dir->d_inode, dentry);
3445	3326	if (open_flag & O_CREAT)
3446	3327	inode_unlock(dir->d_inode);
3447	3328	else
3448	3329	inode_unlock_shared(dir->d_inode);
3449	3330
3450		- if (error)
3451		- goto out;
	3331	+ if (got_write)
	3332	+ mnt_drop_write(nd->path.mnt);
3452	3333
3453		- if (file->f_mode & FMODE_OPENED) {
3454		- if ((file->f_mode & FMODE_CREATED) \|\|
3455		- !S_ISREG(file_inode(file)->i_mode))
3456		- will_truncate = false;
	3334	+ if (IS_ERR(dentry))
	3335	+ return ERR_CAST(dentry);
3457	3336
3458		- audit_inode(nd->name, file->f_path.dentry, 0);
3459		- goto opened;
	3337	+ if (file->f_mode & (FMODE_OPENED \| FMODE_CREATED)) {
	3338	+ dput(nd->path.dentry);
	3339	+ nd->path.dentry = dentry;
	3340	+ return NULL;
3460	3341	}
3461	3342
	3343	+finish_lookup:
	3344	+ if (nd->depth)
	3345	+ put_link(nd);
	3346	+ res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
	3347	+ if (unlikely(res))
	3348	+ nd->flags &= ~(LOOKUP_OPEN\|LOOKUP_CREATE\|LOOKUP_EXCL);
	3349	+ return res;
	3350	+}
	3351	+
	3352	+/*
	3353	+ * Handle the last step of open()
	3354	+ */
	3355	+static int do_open(struct nameidata *nd,
	3356	+ struct file file, const struct open_flags op)
	3357	+{
	3358	+ int open_flag = op->open_flag;
	3359	+ bool do_truncate;
	3360	+ int acc_mode;
	3361	+ int error;
	3362	+
	3363	+ if (!(file->f_mode & (FMODE_OPENED \| FMODE_CREATED))) {
	3364	+ error = complete_walk(nd);
	3365	+ if (error)
	3366	+ return error;
	3367	+ }
	3368	+ if (!(file->f_mode & FMODE_CREATED))
	3369	+ audit_inode(nd->name, nd->path.dentry, 0);
	3370	+ if (open_flag & O_CREAT) {
	3371	+ if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
	3372	+ return -EEXIST;
	3373	+ if (d_is_dir(nd->path.dentry))
	3374	+ return -EISDIR;
	3375	+ error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
	3376	+ d_backing_inode(nd->path.dentry));
	3377	+ if (unlikely(error))
	3378	+ return error;
	3379	+ }
	3380	+ if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
	3381	+ return -ENOTDIR;
	3382	+
	3383	+ do_truncate = false;
	3384	+ acc_mode = op->acc_mode;
3462	3385	if (file->f_mode & FMODE_CREATED) {
3463	3386	/* Don't check for write permission, don't truncate */
3464	3387	open_flag &= ~O_TRUNC;
3465		- will_truncate = false;
3466	3388	acc_mode = 0;
3467		- path_to_nameidata(&path, nd);
3468		- goto finish_open_created;
3469		- }
3470		-
3471		- /*
3472		- * If atomic_open() acquired write access it is dropped now due to
3473		- * possible mount and symlink following (this might be optimized away if
3474		- * necessary...)
3475		- */
3476		- if (got_write) {
3477		- mnt_drop_write(nd->path.mnt);
3478		- got_write = false;
3479		- }
3480		-
3481		- error = follow_managed(&path, nd);
3482		- if (unlikely(error < 0))
3483		- return error;
3484		-
3485		- if (unlikely(d_is_negative(path.dentry))) {
3486		- path_to_nameidata(&path, nd);
3487		- return -ENOENT;
3488		- }
3489		-
3490		- /*
3491		- * create/update audit record if it already exists.
3492		- */
3493		- audit_inode(nd->name, path.dentry, 0);
3494		-
3495		- if (unlikely((open_flag & (O_EXCL \| O_CREAT)) == (O_EXCL \| O_CREAT))) {
3496		- path_to_nameidata(&path, nd);
3497		- return -EEXIST;
3498		- }
3499		-
3500		- seq = 0; /* out of RCU mode, so the value doesn't matter */
3501		- inode = d_backing_inode(path.dentry);
3502		-finish_lookup:
3503		- error = step_into(nd, &path, 0, inode, seq);
3504		- if (unlikely(error))
3505		- return error;
3506		-finish_open:
3507		- /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
3508		- error = complete_walk(nd);
3509		- if (error)
3510		- return error;
3511		- audit_inode(nd->name, nd->path.dentry, 0);
3512		- if (open_flag & O_CREAT) {
3513		- error = -EISDIR;
3514		- if (d_is_dir(nd->path.dentry))
3515		- goto out;
3516		- error = may_create_in_sticky(dir_mode, dir_uid,
3517		- d_backing_inode(nd->path.dentry));
3518		- if (unlikely(error))
3519		- goto out;
3520		- }
3521		- error = -ENOTDIR;
3522		- if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
3523		- goto out;
3524		- if (!d_is_reg(nd->path.dentry))
3525		- will_truncate = false;
3526		-
3527		- if (will_truncate) {
	3389	+ } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
3528	3390	error = mnt_want_write(nd->path.mnt);
3529	3391	if (error)
3530		- goto out;
3531		- got_write = true;
	3392	+ return error;
	3393	+ do_truncate = true;
3532	3394	}
3533		-finish_open_created:
3534	3395	error = may_open(&nd->path, acc_mode, open_flag);
3535		- if (error)
3536		- goto out;
3537		- BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
3538		- error = vfs_open(&nd->path, file);
3539		- if (error)
3540		- goto out;
3541		-opened:
3542		- error = ima_file_check(file, op->acc_mode);
3543		- if (!error && will_truncate)
	3396	+ if (!error && !(file->f_mode & FMODE_OPENED))
	3397	+ error = vfs_open(&nd->path, file);
	3398	+ if (!error)
	3399	+ error = ima_file_check(file, op->acc_mode);
	3400	+ if (!error && do_truncate)
3544	3401	error = handle_truncate(file);
3545		-out:
3546	3402	if (unlikely(error > 0)) {
3547	3403	WARN_ON(1);
3548	3404	error = -EINVAL;
3549	3405	}
3550		- if (got_write)
	3406	+ if (do_truncate)
3551	3407	mnt_drop_write(nd->path.mnt);
3552	3408	return error;
3553	3409	}
..	..	@@ -3560,8 +3416,7 @@
3560	3416	int error;
3561	3417
3562	3418	/* we want directory to be writable */
3563		- error = inode_permission2(ERR_PTR(-EOPNOTSUPP), dir,
3564		- MAY_WRITE \| MAY_EXEC);
	3419	+ error = inode_permission(dir, MAY_WRITE \| MAY_EXEC);
3565	3420	if (error)
3566	3421	goto out_err;
3567	3422	error = -EOPNOTSUPP;
..	..	@@ -3571,6 +3426,7 @@
3571	3426	child = d_alloc(dentry, &slash_name);
3572	3427	if (unlikely(!child))
3573	3428	goto out_err;
	3429	+ mode = vfs_prepare_mode(dir, mode, mode, mode);
3574	3430	error = dir->i_op->tmpfile(dir, child, mode);
3575	3431	if (error)
3576	3432	goto out_err;
..	..	@@ -3583,6 +3439,7 @@
3583	3439	inode->i_state \|= I_LINKABLE;
3584	3440	spin_unlock(&inode->i_lock);
3585	3441	}
	3442	+ ima_post_create_tmpfile(inode);
3586	3443	return child;
3587	3444
3588	3445	out_err:
..	..	@@ -3652,10 +3509,10 @@
3652	3509	} else {
3653	3510	const char *s = path_init(nd, flags);
3654	3511	while (!(error = link_path_walk(s, nd)) &&
3655		- (error = do_last(nd, file, op)) > 0) {
3656		- nd->flags &= ~(LOOKUP_OPEN\|LOOKUP_CREATE\|LOOKUP_EXCL);
3657		- s = trailing_symlink(nd);
3658		- }
	3512	+ (s = open_last_lookups(nd, file, op)) != NULL)
	3513	+ ;
	3514	+ if (!error)
	3515	+ error = do_open(nd, file, op);
3659	3516	terminate_walk(nd);
3660	3517	}
3661	3518	if (likely(!error)) {
..	..	@@ -3815,19 +3672,22 @@
3815	3672	}
3816	3673	EXPORT_SYMBOL(user_path_create);
3817	3674
3818		-int vfs_mknod2(struct vfsmount mnt, struct inode dir, struct dentry *dentry, umode_t mode, dev_t dev)
	3675	+int vfs_mknod(struct inode dir, struct dentry dentry, umode_t mode, dev_t dev)
3819	3676	{
3820		- int error = may_create(mnt, dir, dentry);
	3677	+ bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
	3678	+ int error = may_create(dir, dentry);
3821	3679
3822	3680	if (error)
3823	3681	return error;
3824	3682
3825		- if ((S_ISCHR(mode) \|\| S_ISBLK(mode)) && !capable(CAP_MKNOD))
	3683	+ if ((S_ISCHR(mode) \|\| S_ISBLK(mode)) && !is_whiteout &&
	3684	+ !capable(CAP_MKNOD))
3826	3685	return -EPERM;
3827	3686
3828	3687	if (!dir->i_op->mknod)
3829	3688	return -EPERM;
3830	3689
	3690	+ mode = vfs_prepare_mode(dir, mode, mode, mode);
3831	3691	error = devcgroup_inode_mknod(mode, dev);
3832	3692	if (error)
3833	3693	return error;
..	..	@@ -3840,12 +3700,6 @@
3840	3700	if (!error)
3841	3701	fsnotify_create(dir, dentry);
3842	3702	return error;
3843		-}
3844		-EXPORT_SYMBOL(vfs_mknod2);
3845		-
3846		-int vfs_mknod(struct inode dir, struct dentry dentry, umode_t mode, dev_t dev)
3847		-{
3848		- return vfs_mknod2(NULL, dir, dentry, mode, dev);
3849	3703	}
3850	3704	EXPORT_SYMBOL(vfs_mknod);
3851	3705
..	..	@@ -3866,7 +3720,7 @@
3866	3720	}
3867	3721	}
3868	3722
3869		-long do_mknodat(int dfd, const char __user *filename, umode_t mode,
	3723	+static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
3870	3724	unsigned int dev)
3871	3725	{
3872	3726	struct dentry *dentry;
..	..	@@ -3882,19 +3736,18 @@
3882	3736	if (IS_ERR(dentry))
3883	3737	return PTR_ERR(dentry);
3884	3738
3885		- if (!IS_POSIXACL(path.dentry->d_inode))
3886		- mode &= ~current_umask();
3887		- error = security_path_mknod(&path, dentry, mode, dev);
	3739	+ error = security_path_mknod(&path, dentry,
	3740	+ mode_strip_umask(path.dentry->d_inode, mode), dev);
3888	3741	if (error)
3889	3742	goto out;
3890	3743	switch (mode & S_IFMT) {
3891	3744	case 0: case S_IFREG:
3892		- error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true);
	3745	+ error = vfs_create(path.dentry->d_inode,dentry,mode,true);
3893	3746	if (!error)
3894	3747	ima_post_path_mknod(dentry);
3895	3748	break;
3896	3749	case S_IFCHR: case S_IFBLK:
3897		- error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode,
	3750	+ error = vfs_mknod(path.dentry->d_inode,dentry,mode,
3898	3751	new_decode_dev(dev));
3899	3752	break;
3900	3753	case S_IFIFO: case S_IFSOCK:
..	..	@@ -3921,9 +3774,9 @@
3921	3774	return do_mknodat(AT_FDCWD, filename, mode, dev);
3922	3775	}
3923	3776
3924		-int vfs_mkdir2(struct vfsmount mnt, struct inode dir, struct dentry *dentry, umode_t mode)
	3777	+int vfs_mkdir(struct inode dir, struct dentry dentry, umode_t mode)
3925	3778	{
3926		- int error = may_create(mnt, dir, dentry);
	3779	+ int error = may_create(dir, dentry);
3927	3780	unsigned max_links = dir->i_sb->s_max_links;
3928	3781
3929	3782	if (error)
..	..	@@ -3932,7 +3785,7 @@
3932	3785	if (!dir->i_op->mkdir)
3933	3786	return -EPERM;
3934	3787
3935		- mode &= (S_IRWXUGO\|S_ISVTX);
	3788	+ mode = vfs_prepare_mode(dir, mode, S_IRWXUGO \| S_ISVTX, 0);
3936	3789	error = security_inode_mkdir(dir, dentry, mode);
3937	3790	if (error)
3938	3791	return error;
..	..	@@ -3945,15 +3798,9 @@
3945	3798	fsnotify_mkdir(dir, dentry);
3946	3799	return error;
3947	3800	}
3948		-EXPORT_SYMBOL(vfs_mkdir2);
	3801	+EXPORT_SYMBOL_NS(vfs_mkdir, ANDROID_GKI_VFS_EXPORT_ONLY);
3949	3802
3950		-int vfs_mkdir(struct inode dir, struct dentry dentry, umode_t mode)
3951		-{
3952		- return vfs_mkdir2(NULL, dir, dentry, mode);
3953		-}
3954		-EXPORT_SYMBOL(vfs_mkdir);
3955		-
3956		-long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
	3803	+static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
3957	3804	{
3958	3805	struct dentry *dentry;
3959	3806	struct path path;
..	..	@@ -3965,11 +3812,10 @@
3965	3812	if (IS_ERR(dentry))
3966	3813	return PTR_ERR(dentry);
3967	3814
3968		- if (!IS_POSIXACL(path.dentry->d_inode))
3969		- mode &= ~current_umask();
3970		- error = security_path_mkdir(&path, dentry, mode);
	3815	+ error = security_path_mkdir(&path, dentry,
	3816	+ mode_strip_umask(path.dentry->d_inode, mode));
3971	3817	if (!error)
3972		- error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
	3818	+ error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
3973	3819	done_path_create(&path, dentry);
3974	3820	if (retry_estale(error, lookup_flags)) {
3975	3821	lookup_flags \|= LOOKUP_REVAL;
..	..	@@ -3988,9 +3834,9 @@
3988	3834	return do_mkdirat(AT_FDCWD, pathname, mode);
3989	3835	}
3990	3836
3991		-int vfs_rmdir2(struct vfsmount mnt, struct inode dir, struct dentry *dentry)
	3837	+int vfs_rmdir(struct inode dir, struct dentry dentry)
3992	3838	{
3993		- int error = may_delete(mnt, dir, dentry, 1);
	3839	+ int error = may_delete(dir, dentry, 1);
3994	3840
3995	3841	if (error)
3996	3842	return error;
..	..	@@ -4022,28 +3868,21 @@
4022	3868	inode_unlock(dentry->d_inode);
4023	3869	dput(dentry);
4024	3870	if (!error)
4025		- d_delete(dentry);
	3871	+ d_delete_notify(dir, dentry);
4026	3872	return error;
4027	3873	}
4028		-EXPORT_SYMBOL(vfs_rmdir2);
	3874	+EXPORT_SYMBOL_NS(vfs_rmdir, ANDROID_GKI_VFS_EXPORT_ONLY);
4029	3875
4030		-int vfs_rmdir(struct inode dir, struct dentry dentry)
4031		-{
4032		- return vfs_rmdir2(NULL, dir, dentry);
4033		-}
4034		-EXPORT_SYMBOL(vfs_rmdir);
4035		-
4036		-long do_rmdir(int dfd, const char __user *pathname)
	3876	+long do_rmdir(int dfd, struct filename *name)
4037	3877	{
4038	3878	int error = 0;
4039		- struct filename *name;
4040	3879	struct dentry *dentry;
4041	3880	struct path path;
4042	3881	struct qstr last;
4043	3882	int type;
4044	3883	unsigned int lookup_flags = 0;
4045	3884	retry:
4046		- name = filename_parentat(dfd, getname(pathname), lookup_flags,
	3885	+ name = filename_parentat(dfd, name, lookup_flags,
4047	3886	&path, &last, &type);
4048	3887	if (IS_ERR(name))
4049	3888	return PTR_ERR(name);
..	..	@@ -4076,7 +3915,7 @@
4076	3915	error = security_path_rmdir(&path, dentry);
4077	3916	if (error)
4078	3917	goto exit3;
4079		- error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry);
	3918	+ error = vfs_rmdir(path.dentry->d_inode, dentry);
4080	3919	exit3:
4081	3920	dput(dentry);
4082	3921	exit2:
..	..	@@ -4084,17 +3923,17 @@
4084	3923	mnt_drop_write(path.mnt);
4085	3924	exit1:
4086	3925	path_put(&path);
4087		- putname(name);
4088	3926	if (retry_estale(error, lookup_flags)) {
4089	3927	lookup_flags \|= LOOKUP_REVAL;
4090	3928	goto retry;
4091	3929	}
	3930	+ putname(name);
4092	3931	return error;
4093	3932	}
4094	3933
4095	3934	SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
4096	3935	{
4097		- return do_rmdir(AT_FDCWD, pathname);
	3936	+ return do_rmdir(AT_FDCWD, getname(pathname));
4098	3937	}
4099	3938
4100	3939	/**
..	..	@@ -4115,10 +3954,10 @@
4115	3954	* be appropriate for callers that expect the underlying filesystem not
4116	3955	* to be NFS exported.
4117	3956	*/
4118		-int vfs_unlink2(struct vfsmount mnt, struct inode dir, struct dentry dentry, struct inode *delegated_inode)
	3957	+int vfs_unlink(struct inode dir, struct dentry dentry, struct inode **delegated_inode)
4119	3958	{
4120	3959	struct inode *target = dentry->d_inode;
4121		- int error = may_delete(mnt, dir, dentry, 0);
	3960	+ int error = may_delete(dir, dentry, 0);
4122	3961
4123	3962	if (error)
4124	3963	return error;
..	..	@@ -4146,20 +3985,16 @@
4146	3985	inode_unlock(target);
4147	3986
4148	3987	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
4149		- if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
	3988	+ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
	3989	+ fsnotify_unlink(dir, dentry);
	3990	+ } else if (!error) {
4150	3991	fsnotify_link_count(target);
4151		- d_delete(dentry);
	3992	+ d_delete_notify(dir, dentry);
4152	3993	}
4153	3994
4154	3995	return error;
4155	3996	}
4156		-EXPORT_SYMBOL(vfs_unlink2);
4157		-
4158		-int vfs_unlink(struct inode dir, struct dentry dentry, struct inode **delegated_inode)
4159		-{
4160		- return vfs_unlink2(NULL, dir, dentry, delegated_inode);
4161		-}
4162		-EXPORT_SYMBOL(vfs_unlink);
	3997	+EXPORT_SYMBOL_NS(vfs_unlink, ANDROID_GKI_VFS_EXPORT_ONLY);
4163	3998
4164	3999	/*
4165	4000	* Make sure that the actual truncation of the file will occur outside its
..	..	@@ -4204,7 +4039,7 @@
4204	4039	error = security_path_unlink(&path, dentry);
4205	4040	if (error)
4206	4041	goto exit2;
4207		- error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode);
	4042	+ error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
4208	4043	exit2:
4209	4044	dput(dentry);
4210	4045	}
..	..	@@ -4244,8 +4079,7 @@
4244	4079	return -EINVAL;
4245	4080
4246	4081	if (flag & AT_REMOVEDIR)
4247		- return do_rmdir(dfd, pathname);
4248		-
	4082	+ return do_rmdir(dfd, getname(pathname));
4249	4083	return do_unlinkat(dfd, getname(pathname));
4250	4084	}
4251	4085
..	..	@@ -4254,9 +4088,9 @@
4254	4088	return do_unlinkat(AT_FDCWD, getname(pathname));
4255	4089	}
4256	4090
4257		-int vfs_symlink2(struct vfsmount mnt, struct inode dir, struct dentry dentry, const char oldname)
	4091	+int vfs_symlink(struct inode dir, struct dentry dentry, const char *oldname)
4258	4092	{
4259		- int error = may_create(mnt, dir, dentry);
	4093	+ int error = may_create(dir, dentry);
4260	4094
4261	4095	if (error)
4262	4096	return error;
..	..	@@ -4273,15 +4107,9 @@
4273	4107	fsnotify_create(dir, dentry);
4274	4108	return error;
4275	4109	}
4276		-EXPORT_SYMBOL(vfs_symlink2);
4277		-
4278		-int vfs_symlink(struct inode dir, struct dentry dentry, const char *oldname)
4279		-{
4280		- return vfs_symlink2(NULL, dir, dentry, oldname);
4281		-}
4282	4110	EXPORT_SYMBOL(vfs_symlink);
4283	4111
4284		-long do_symlinkat(const char __user *oldname, int newdfd,
	4112	+static long do_symlinkat(const char __user *oldname, int newdfd,
4285	4113	const char __user *newname)
4286	4114	{
4287	4115	int error;
..	..	@@ -4301,7 +4129,7 @@
4301	4129
4302	4130	error = security_path_symlink(&path, dentry, from->name);
4303	4131	if (!error)
4304		- error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name);
	4132	+ error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
4305	4133	done_path_create(&path, dentry);
4306	4134	if (retry_estale(error, lookup_flags)) {
4307	4135	lookup_flags \|= LOOKUP_REVAL;
..	..	@@ -4342,7 +4170,7 @@
4342	4170	* be appropriate for callers that expect the underlying filesystem not
4343	4171	* to be NFS exported.
4344	4172	*/
4345		-int vfs_link2(struct vfsmount mnt, struct dentry old_dentry, struct inode dir, struct dentry new_dentry, struct inode **delegated_inode)
	4173	+int vfs_link(struct dentry old_dentry, struct inode dir, struct dentry new_dentry, struct inode *delegated_inode)
4346	4174	{
4347	4175	struct inode *inode = old_dentry->d_inode;
4348	4176	unsigned max_links = dir->i_sb->s_max_links;
..	..	@@ -4351,7 +4179,7 @@
4351	4179	if (!inode)
4352	4180	return -ENOENT;
4353	4181
4354		- error = may_create(mnt, dir, new_dentry);
	4182	+ error = may_create(dir, new_dentry);
4355	4183	if (error)
4356	4184	return error;
4357	4185
..	..	@@ -4401,13 +4229,7 @@
4401	4229	fsnotify_link(dir, inode, new_dentry);
4402	4230	return error;
4403	4231	}
4404		-EXPORT_SYMBOL(vfs_link2);
4405		-
4406		-int vfs_link(struct dentry old_dentry, struct inode dir, struct dentry new_dentry, struct inode *delegated_inode)
4407		-{
4408		- return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode);
4409		-}
4410		-EXPORT_SYMBOL(vfs_link);
	4232	+EXPORT_SYMBOL_NS(vfs_link, ANDROID_GKI_VFS_EXPORT_ONLY);
4411	4233
4412	4234	/*
4413	4235	* Hardlinks are often used in delicate situations. We avoid
..	..	@@ -4418,7 +4240,7 @@
4418	4240	* with linux 2.0, and to avoid hard-linking to directories
4419	4241	* and other special files. --ADM
4420	4242	*/
4421		-int do_linkat(int olddfd, const char __user *oldname, int newdfd,
	4243	+static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
4422	4244	const char __user *newname, int flags)
4423	4245	{
4424	4246	struct dentry *new_dentry;
..	..	@@ -4462,7 +4284,7 @@
4462	4284	error = security_path_link(old_path.dentry, &new_path, new_dentry);
4463	4285	if (error)
4464	4286	goto out_dput;
4465		- error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
	4287	+ error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
4466	4288	out_dput:
4467	4289	done_path_create(&new_path, new_dentry);
4468	4290	if (delegated_inode) {
..	..	@@ -4526,7 +4348,7 @@
4526	4348	* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
4527	4349	* story.
4528	4350	* c) we have to lock _four_ objects - parents and victim (if it exists),
4529		- * and source (if it is not a directory).
	4351	+ * and source.
4530	4352	* And that - after we got ->i_mutex on parents (until then we don't know
4531	4353	* whether the target exists). Solution: try to be smart with locking
4532	4354	* order for inodes. We rely on the fact that tree topology may change
..	..	@@ -4544,8 +4366,7 @@
4544	4366	* ->i_mutex on parents, which works but leads to some truly excessive
4545	4367	* locking].
4546	4368	*/
4547		-int vfs_rename2(struct vfsmount *mnt,
4548		- struct inode old_dir, struct dentry old_dentry,
	4369	+int vfs_rename(struct inode old_dir, struct dentry old_dentry,
4549	4370	struct inode new_dir, struct dentry new_dentry,
4550	4371	struct inode **delegated_inode, unsigned int flags)
4551	4372	{
..	..	@@ -4560,19 +4381,19 @@
4560	4381	if (source == target)
4561	4382	return 0;
4562	4383
4563		- error = may_delete(mnt, old_dir, old_dentry, is_dir);
	4384	+ error = may_delete(old_dir, old_dentry, is_dir);
4564	4385	if (error)
4565	4386	return error;
4566	4387
4567	4388	if (!target) {
4568		- error = may_create(mnt, new_dir, new_dentry);
	4389	+ error = may_create(new_dir, new_dentry);
4569	4390	} else {
4570	4391	new_is_dir = d_is_dir(new_dentry);
4571	4392
4572	4393	if (!(flags & RENAME_EXCHANGE))
4573		- error = may_delete(mnt, new_dir, new_dentry, is_dir);
	4394	+ error = may_delete(new_dir, new_dentry, is_dir);
4574	4395	else
4575		- error = may_delete(mnt, new_dir, new_dentry, new_is_dir);
	4396	+ error = may_delete(new_dir, new_dentry, new_is_dir);
4576	4397	}
4577	4398	if (error)
4578	4399	return error;
..	..	@@ -4586,12 +4407,12 @@
4586	4407	*/
4587	4408	if (new_dir != old_dir) {
4588	4409	if (is_dir) {
4589		- error = inode_permission2(mnt, source, MAY_WRITE);
	4410	+ error = inode_permission(source, MAY_WRITE);
4590	4411	if (error)
4591	4412	return error;
4592	4413	}
4593	4414	if ((flags & RENAME_EXCHANGE) && new_is_dir) {
4594		- error = inode_permission2(mnt, target, MAY_WRITE);
	4415	+ error = inode_permission(target, MAY_WRITE);
4595	4416	if (error)
4596	4417	return error;
4597	4418	}
..	..	@@ -4604,10 +4425,16 @@
4604	4425
4605	4426	take_dentry_name_snapshot(&old_name, old_dentry);
4606	4427	dget(new_dentry);
4607		- if (!is_dir \|\| (flags & RENAME_EXCHANGE))
4608		- lock_two_nondirectories(source, target);
4609		- else if (target)
4610		- inode_lock(target);
	4428	+ /*
	4429	+ * Lock all moved children. Moved directories may need to change parent
	4430	+ * pointer so they need the lock to prevent against concurrent
	4431	+ * directory changes moving parent pointer. For regular files we've
	4432	+ * historically always done this. The lockdep locking subclasses are
	4433	+ * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
	4434	+ * regular files and directories so it's difficult to tell which
	4435	+ * subclasses to use.
	4436	+ */
	4437	+ lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
4611	4438
4612	4439	error = -EBUSY;
4613	4440	if (is_local_mountpoint(old_dentry) \|\| is_local_mountpoint(new_dentry))
..	..	@@ -4651,16 +4478,15 @@
4651	4478	d_exchange(old_dentry, new_dentry);
4652	4479	}
4653	4480	out:
4654		- if (!is_dir \|\| (flags & RENAME_EXCHANGE))
4655		- unlock_two_nondirectories(source, target);
4656		- else if (target)
	4481	+ inode_unlock(source);
	4482	+ if (target)
4657	4483	inode_unlock(target);
4658	4484	dput(new_dentry);
4659	4485	if (!error) {
4660		- fsnotify_move(old_dir, new_dir, old_name.name, is_dir,
	4486	+ fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
4661	4487	!(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
4662	4488	if (flags & RENAME_EXCHANGE) {
4663		- fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
	4489	+ fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
4664	4490	new_is_dir, NULL, new_dentry);
4665	4491	}
4666	4492	}
..	..	@@ -4668,18 +4494,10 @@
4668	4494
4669	4495	return error;
4670	4496	}
4671		-EXPORT_SYMBOL(vfs_rename2);
	4497	+EXPORT_SYMBOL_NS(vfs_rename, ANDROID_GKI_VFS_EXPORT_ONLY);
4672	4498
4673		-int vfs_rename(struct inode old_dir, struct dentry old_dentry,
4674		- struct inode new_dir, struct dentry new_dentry,
4675		- struct inode **delegated_inode, unsigned int flags)
4676		-{
4677		- return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags);
4678		-}
4679		-EXPORT_SYMBOL(vfs_rename);
4680		-
4681		-static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
4682		- const char __user *newname, unsigned int flags)
	4499	+int do_renameat2(int olddfd, struct filename *from, int newdfd,
	4500	+ struct filename *to, unsigned int flags)
4683	4501	{
4684	4502	struct dentry old_dentry, new_dentry;
4685	4503	struct dentry *trap;
..	..	@@ -4687,35 +4505,30 @@
4687	4505	struct qstr old_last, new_last;
4688	4506	int old_type, new_type;
4689	4507	struct inode *delegated_inode = NULL;
4690		- struct filename *from;
4691		- struct filename *to;
4692	4508	unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
4693	4509	bool should_retry = false;
4694		- int error;
	4510	+ int error = -EINVAL;
4695	4511
4696	4512	if (flags & ~(RENAME_NOREPLACE \| RENAME_EXCHANGE \| RENAME_WHITEOUT))
4697		- return -EINVAL;
	4513	+ goto put_both;
4698	4514
4699	4515	if ((flags & (RENAME_NOREPLACE \| RENAME_WHITEOUT)) &&
4700	4516	(flags & RENAME_EXCHANGE))
4701		- return -EINVAL;
4702		-
4703		- if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
4704		- return -EPERM;
	4517	+ goto put_both;
4705	4518
4706	4519	if (flags & RENAME_EXCHANGE)
4707	4520	target_flags = 0;
4708	4521
4709	4522	retry:
4710		- from = filename_parentat(olddfd, getname(oldname), lookup_flags,
4711		- &old_path, &old_last, &old_type);
	4523	+ from = filename_parentat(olddfd, from, lookup_flags, &old_path,
	4524	+ &old_last, &old_type);
4712	4525	if (IS_ERR(from)) {
4713	4526	error = PTR_ERR(from);
4714		- goto exit;
	4527	+ goto put_new;
4715	4528	}
4716	4529
4717		- to = filename_parentat(newdfd, getname(newname), lookup_flags,
4718		- &new_path, &new_last, &new_type);
	4530	+ to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
	4531	+ &new_type);
4719	4532	if (IS_ERR(to)) {
4720	4533	error = PTR_ERR(to);
4721	4534	goto exit1;
..	..	@@ -4789,7 +4602,7 @@
4789	4602	&new_path, new_dentry, flags);
4790	4603	if (error)
4791	4604	goto exit5;
4792		- error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry,
	4605	+ error = vfs_rename(old_path.dentry->d_inode, old_dentry,
4793	4606	new_path.dentry->d_inode, new_dentry,
4794	4607	&delegated_inode, flags);
4795	4608	exit5:
..	..	@@ -4808,49 +4621,41 @@
4808	4621	if (retry_estale(error, lookup_flags))
4809	4622	should_retry = true;
4810	4623	path_put(&new_path);
4811		- putname(to);
4812	4624	exit1:
4813	4625	path_put(&old_path);
4814		- putname(from);
4815	4626	if (should_retry) {
4816	4627	should_retry = false;
4817	4628	lookup_flags \|= LOOKUP_REVAL;
4818	4629	goto retry;
4819	4630	}
4820		-exit:
	4631	+put_both:
	4632	+ if (!IS_ERR(from))
	4633	+ putname(from);
	4634	+put_new:
	4635	+ if (!IS_ERR(to))
	4636	+ putname(to);
4821	4637	return error;
4822	4638	}
4823	4639
4824	4640	SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4825	4641	int, newdfd, const char __user *, newname, unsigned int, flags)
4826	4642	{
4827		- return do_renameat2(olddfd, oldname, newdfd, newname, flags);
	4643	+ return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
	4644	+ flags);
4828	4645	}
4829	4646
4830	4647	SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
4831	4648	int, newdfd, const char __user *, newname)
4832	4649	{
4833		- return do_renameat2(olddfd, oldname, newdfd, newname, 0);
	4650	+ return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
	4651	+ 0);
4834	4652	}
4835	4653
4836	4654	SYSCALL_DEFINE2(rename, const char __user , oldname, const char __user , newname)
4837	4655	{
4838		- return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
	4656	+ return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
	4657	+ getname(newname), 0);
4839	4658	}
4840		-
4841		-int vfs_whiteout(struct inode dir, struct dentry dentry)
4842		-{
4843		- int error = may_create(NULL, dir, dentry);
4844		- if (error)
4845		- return error;
4846		-
4847		- if (!dir->i_op->mknod)
4848		- return -EPERM;
4849		-
4850		- return dir->i_op->mknod(dir, dentry,
4851		- S_IFCHR \| WHITEOUT_MODE, WHITEOUT_DEV);
4852		-}
4853		-EXPORT_SYMBOL(vfs_whiteout);
4854	4659
4855	4660	int readlink_copy(char __user buffer, int buflen, const char link)
4856	4661	{
..	..	@@ -4987,7 +4792,7 @@
4987	4792	{
4988	4793	struct address_space *mapping = inode->i_mapping;
4989	4794	struct page *page;
4990		- void *fsdata;
	4795	+ void *fsdata = NULL;
4991	4796	int err;
4992	4797	unsigned int flags = 0;
4993	4798	if (nofs)