~hc/RK356X_SDK_RELEASE.git

..	..	@@ -8,6 +8,7 @@
8	8	#include <linux/blkdev.h>
9	9	#include <linux/list_sort.h>
10	10	#include <linux/iversion.h>
	11	+#include "misc.h"
11	12	#include "ctree.h"
12	13	#include "tree-log.h"
13	14	#include "disk-io.h"
..	..	@@ -17,6 +18,8 @@
17	18	#include "compression.h"
18	19	#include "qgroup.h"
19	20	#include "inode-map.h"
	21	+#include "block-group.h"
	22	+#include "space-info.h"
20	23
21	24	/* magic values for the inode_only field in btrfs_log_inode:
22	25	*
..	..	@@ -24,9 +27,12 @@
24	27	* LOG_INODE_EXISTS means to log just enough to recreate the inode
25	28	* during log replay
26	29	*/
27		-#define LOG_INODE_ALL 0
28		-#define LOG_INODE_EXISTS 1
29		-#define LOG_OTHER_INODE 2
	30	+enum {
	31	+ LOG_INODE_ALL,
	32	+ LOG_INODE_EXISTS,
	33	+ LOG_OTHER_INODE,
	34	+ LOG_OTHER_INODE_ALL,
	35	+};
30	36
31	37	/*
32	38	* directory trouble cases
..	..	@@ -80,16 +86,16 @@
80	86	* The last stage is to deal with directories and links and extents
81	87	* and all the other fun semantics
82	88	*/
83		-#define LOG_WALK_PIN_ONLY 0
84		-#define LOG_WALK_REPLAY_INODES 1
85		-#define LOG_WALK_REPLAY_DIR_INDEX 2
86		-#define LOG_WALK_REPLAY_ALL 3
	89	+enum {
	90	+ LOG_WALK_PIN_ONLY,
	91	+ LOG_WALK_REPLAY_INODES,
	92	+ LOG_WALK_REPLAY_DIR_INDEX,
	93	+ LOG_WALK_REPLAY_ALL,
	94	+};
87	95
88	96	static int btrfs_log_inode(struct btrfs_trans_handle *trans,
89	97	struct btrfs_root root, struct btrfs_inode inode,
90	98	int inode_only,
91		- const loff_t start,
92		- const loff_t end,
93	99	struct btrfs_log_ctx *ctx);
94	100	static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
95	101	struct btrfs_root *root,
..	..	@@ -138,7 +144,7 @@
138	144	mutex_lock(&root->log_mutex);
139	145
140	146	if (root->log_root) {
141		- if (btrfs_need_log_full_commit(fs_info, trans)) {
	147	+ if (btrfs_need_log_full_commit(trans)) {
142	148	ret = -EAGAIN;
143	149	goto out;
144	150	}
..	..	@@ -161,13 +167,14 @@
161	167	if (ret)
162	168	goto out;
163	169
	170	+ set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
164	171	clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
165	172	root->log_start_pid = current->pid;
166	173	}
167	174
168	175	atomic_inc(&root->log_batch);
169	176	atomic_inc(&root->log_writers);
170		- if (ctx) {
	177	+ if (ctx && !ctx->logging_new_name) {
171	178	int index = root->log_transid % 2;
172	179	list_add_tail(&ctx->list, &root->log_ctxs[index]);
173	180	ctx->log_transid = root->log_transid;
..	..	@@ -187,9 +194,8 @@
187	194	{
188	195	int ret = -ENOENT;
189	196
190		- smp_mb();
191		- if (!root->log_root)
192		- return -ENOENT;
	197	+ if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
	198	+ return ret;
193	199
194	200	mutex_lock(&root->log_mutex);
195	201	if (root->log_root) {
..	..	@@ -205,14 +211,9 @@
205	211	* until you call btrfs_end_log_trans() or it makes any future
206	212	* log transactions wait until you call btrfs_end_log_trans()
207	213	*/
208		-int btrfs_pin_log_trans(struct btrfs_root *root)
	214	+void btrfs_pin_log_trans(struct btrfs_root *root)
209	215	{
210		- int ret = -ENOENT;
211		-
212		- mutex_lock(&root->log_mutex);
213	216	atomic_inc(&root->log_writers);
214		- mutex_unlock(&root->log_mutex);
215		- return ret;
216	217	}
217	218
218	219	/*
..	..	@@ -227,6 +228,17 @@
227	228	}
228	229	}
229	230
	231	+static int btrfs_write_tree_block(struct extent_buffer *buf)
	232	+{
	233	+ return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
	234	+ buf->start + buf->len - 1);
	235	+}
	236	+
	237	+static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
	238	+{
	239	+ filemap_fdatawait_range(buf->pages[0]->mapping,
	240	+ buf->start, buf->start + buf->len - 1);
	241	+}
230	242
231	243	/*
232	244	* the walk control struct is used to pass state down the chain when
..	..	@@ -301,12 +313,12 @@
301	313	}
302	314
303	315	if (wc->pin)
304		- ret = btrfs_pin_extent_for_log_replay(fs_info, eb->start,
	316	+ ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start,
305	317	eb->len);
306	318
307	319	if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
308	320	if (wc->pin && btrfs_header_level(eb) == 0)
309		- ret = btrfs_exclude_logged_extents(fs_info, eb);
	321	+ ret = btrfs_exclude_logged_extents(eb);
310	322	if (wc->write)
311	323	btrfs_write_tree_block(eb);
312	324	if (wc->wait)
..	..	@@ -335,7 +347,6 @@
335	347	struct extent_buffer *eb, int slot,
336	348	struct btrfs_key *key)
337	349	{
338		- struct btrfs_fs_info *fs_info = root->fs_info;
339	350	int ret;
340	351	u32 item_size;
341	352	u64 saved_i_size = 0;
..	..	@@ -456,10 +467,9 @@
456	467	found_size = btrfs_item_size_nr(path->nodes[0],
457	468	path->slots[0]);
458	469	if (found_size > item_size)
459		- btrfs_truncate_item(fs_info, path, item_size, 1);
	470	+ btrfs_truncate_item(path, item_size, 1);
460	471	else if (found_size < item_size)
461		- btrfs_extend_item(fs_info, path,
462		- item_size - found_size);
	472	+ btrfs_extend_item(path, item_size - found_size);
463	473	} else if (ret) {
464	474	return ret;
465	475	}
..	..	@@ -495,13 +505,8 @@
495	505	*/
496	506	if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497	507	S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
498		- ino_size != 0) {
499		- struct btrfs_map_token token;
500		-
501		- btrfs_init_map_token(&token);
502		- btrfs_set_token_inode_size(dst_eb, dst_item,
503		- ino_size, &token);
504		- }
	508	+ ino_size != 0)
	509	+ btrfs_set_inode_size(dst_eb, dst_item, ino_size);
505	510	goto no_copy;
506	511	}
507	512
..	..	@@ -545,13 +550,9 @@
545	550	static noinline struct inode read_one_inode(struct btrfs_root root,
546	551	u64 objectid)
547	552	{
548		- struct btrfs_key key;
549	553	struct inode *inode;
550	554
551		- key.objectid = objectid;
552		- key.type = BTRFS_INODE_ITEM_KEY;
553		- key.offset = 0;
554		- inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
	555	+ inode = btrfs_iget(root->fs_info->sb, objectid, root);
555	556	if (IS_ERR(inode))
556	557	inode = NULL;
557	558	return inode;
..	..	@@ -696,20 +697,27 @@
696	697	goto out;
697	698
698	699	if (ins.objectid > 0) {
	700	+ struct btrfs_ref ref = { 0 };
699	701	u64 csum_start;
700	702	u64 csum_end;
701	703	LIST_HEAD(ordered_sums);
	704	+
702	705	/*
703	706	* is this extent already allocated in the extent
704	707	* allocation tree? If so, just add a reference
705	708	*/
706	709	ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
707	710	ins.offset);
708		- if (ret == 0) {
709		- ret = btrfs_inc_extent_ref(trans, root,
710		- ins.objectid, ins.offset,
711		- 0, root->root_key.objectid,
	711	+ if (ret < 0) {
	712	+ goto out;
	713	+ } else if (ret == 0) {
	714	+ btrfs_init_generic_ref(&ref,
	715	+ BTRFS_ADD_DELAYED_REF,
	716	+ ins.objectid, ins.offset, 0);
	717	+ btrfs_init_data_ref(&ref,
	718	+ root->root_key.objectid,
712	719	key->objectid, offset);
	720	+ ret = btrfs_inc_extent_ref(trans, &ref);
713	721	if (ret)
714	722	goto out;
715	723	} else {
..	..	@@ -816,6 +824,11 @@
816	824	if (ret)
817	825	goto out;
818	826	}
	827	+
	828	+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start,
	829	+ extent_end - start);
	830	+ if (ret)
	831	+ goto out;
819	832
820	833	inode_add_bytes(inode, nbytes);
821	834	update_inode:
..	..	@@ -941,54 +954,32 @@
941	954	const char *name, int namelen)
942	955	{
943	956	struct btrfs_path *path;
944		- struct btrfs_inode_ref *ref;
945		- unsigned long ptr;
946		- unsigned long ptr_end;
947		- unsigned long name_ptr;
948		- int found_name_len;
949		- int item_size;
950	957	int ret;
951		- int match = 0;
952	958
953	959	path = btrfs_alloc_path();
954	960	if (!path)
955	961	return -ENOMEM;
956	962
957	963	ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
958		- if (ret != 0)
	964	+ if (ret < 0) {
959	965	goto out;
	966	+ } else if (ret == 1) {
	967	+ ret = 0;
	968	+ goto out;
	969	+ }
960	970
961		- ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
962		-
963		- if (key->type == BTRFS_INODE_EXTREF_KEY) {
964		- if (btrfs_find_name_in_ext_backref(path->nodes[0],
	971	+ if (key->type == BTRFS_INODE_EXTREF_KEY)
	972	+ ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
	973	+ path->slots[0],
	974	+ ref_objectid,
	975	+ name, namelen);
	976	+ else
	977	+ ret = !!btrfs_find_name_in_backref(path->nodes[0],
965	978	path->slots[0],
966		- ref_objectid,
967		- name, namelen, NULL))
968		- match = 1;
969		-
970		- goto out;
971		- }
972		-
973		- item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
974		- ptr_end = ptr + item_size;
975		- while (ptr < ptr_end) {
976		- ref = (struct btrfs_inode_ref *)ptr;
977		- found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
978		- if (found_name_len == namelen) {
979		- name_ptr = (unsigned long)(ref + 1);
980		- ret = memcmp_extent_buffer(path->nodes[0], name,
981		- name_ptr, namelen);
982		- if (ret == 0) {
983		- match = 1;
984		- goto out;
985		- }
986		- }
987		- ptr = (unsigned long)(ref + 1) + found_name_len;
988		- }
	979	+ name, namelen);
989	980	out:
990	981	btrfs_free_path(path);
991		- return match;
	982	+ return ret;
992	983	}
993	984
994	985	static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
..	..	@@ -1046,10 +1037,13 @@
1046	1037	(unsigned long)(victim_ref + 1),
1047	1038	victim_name_len);
1048	1039
1049		- if (!backref_in_log(log_root, &search_key,
1050		- parent_objectid,
1051		- victim_name,
1052		- victim_name_len)) {
	1040	+ ret = backref_in_log(log_root, &search_key,
	1041	+ parent_objectid, victim_name,
	1042	+ victim_name_len);
	1043	+ if (ret < 0) {
	1044	+ kfree(victim_name);
	1045	+ return ret;
	1046	+ } else if (!ret) {
1053	1047	inc_nlink(&inode->vfs_inode);
1054	1048	btrfs_release_path(path);
1055	1049
..	..	@@ -1081,7 +1075,9 @@
1081	1075	extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
1082	1076	inode_objectid, parent_objectid, 0,
1083	1077	0);
1084		- if (!IS_ERR_OR_NULL(extref)) {
	1078	+ if (IS_ERR(extref)) {
	1079	+ return PTR_ERR(extref);
	1080	+ } else if (extref) {
1085	1081	u32 item_size;
1086	1082	u32 cur_offset = 0;
1087	1083	unsigned long base;
..	..	@@ -1111,10 +1107,13 @@
1111	1107	search_key.offset = btrfs_extref_hash(parent_objectid,
1112	1108	victim_name,
1113	1109	victim_name_len);
1114		- ret = 0;
1115		- if (!backref_in_log(log_root, &search_key,
1116		- parent_objectid, victim_name,
1117		- victim_name_len)) {
	1110	+ ret = backref_in_log(log_root, &search_key,
	1111	+ parent_objectid, victim_name,
	1112	+ victim_name_len);
	1113	+ if (ret < 0) {
	1114	+ kfree(victim_name);
	1115	+ return ret;
	1116	+ } else if (!ret) {
1118	1117	ret = -ENOENT;
1119	1118	victim_parent = read_one_inode(root,
1120	1119	parent_objectid);
..	..	@@ -1159,7 +1158,7 @@
1159	1158	}
1160	1159	btrfs_release_path(path);
1161	1160
1162		- /* look for a conflicing name */
	1161	+ /* look for a conflicting name */
1163	1162	di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
1164	1163	name, namelen, 0);
1165	1164	if (IS_ERR(di)) {
..	..	@@ -1268,12 +1267,12 @@
1268	1267	goto out;
1269	1268
1270	1269	if (key->type == BTRFS_INODE_EXTREF_KEY)
1271		- ret = btrfs_find_name_in_ext_backref(log_eb, log_slot,
1272		- parent_id, name,
1273		- namelen, NULL);
	1270	+ ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot,
	1271	+ parent_id, name,
	1272	+ namelen);
1274	1273	else
1275		- ret = btrfs_find_name_in_backref(log_eb, log_slot, name,
1276		- namelen, NULL);
	1274	+ ret = !!btrfs_find_name_in_backref(log_eb, log_slot,
	1275	+ name, namelen);
1277	1276
1278	1277	if (!ret) {
1279	1278	struct inode *dir;
..	..	@@ -1289,6 +1288,15 @@
1289	1288	inode, name, namelen);
1290	1289	kfree(name);
1291	1290	iput(dir);
	1291	+ /*
	1292	+ * Whenever we need to check if a name exists or not, we
	1293	+ * check the subvolume tree. So after an unlink we must
	1294	+ * run delayed items, so that future checks for a name
	1295	+ * during log replay see that the name does not exists
	1296	+ * anymore.
	1297	+ */
	1298	+ if (!ret)
	1299	+ ret = btrfs_run_delayed_items(trans);
1292	1300	if (ret)
1293	1301	goto out;
1294	1302	goto again;
..	..	@@ -1335,15 +1343,75 @@
1335	1343	goto out;
1336	1344	}
1337	1345	if (key.type == BTRFS_INODE_EXTREF_KEY)
1338		- ret = btrfs_find_name_in_ext_backref(path->nodes[0],
1339		- path->slots[0], parent_id,
1340		- name, namelen, NULL);
	1346	+ ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
	1347	+ path->slots[0], parent_id, name, namelen);
1341	1348	else
1342		- ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
1343		- name, namelen, NULL);
	1349	+ ret = !!btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
	1350	+ name, namelen);
1344	1351
1345	1352	out:
1346	1353	btrfs_free_path(path);
	1354	+ return ret;
	1355	+}
	1356	+
	1357	+static int add_link(struct btrfs_trans_handle trans, struct btrfs_root root,
	1358	+ struct inode dir, struct inode inode, const char *name,
	1359	+ int namelen, u64 ref_index)
	1360	+{
	1361	+ struct btrfs_dir_item *dir_item;
	1362	+ struct btrfs_key key;
	1363	+ struct btrfs_path *path;
	1364	+ struct inode *other_inode = NULL;
	1365	+ int ret;
	1366	+
	1367	+ path = btrfs_alloc_path();
	1368	+ if (!path)
	1369	+ return -ENOMEM;
	1370	+
	1371	+ dir_item = btrfs_lookup_dir_item(NULL, root, path,
	1372	+ btrfs_ino(BTRFS_I(dir)),
	1373	+ name, namelen, 0);
	1374	+ if (!dir_item) {
	1375	+ btrfs_release_path(path);
	1376	+ goto add_link;
	1377	+ } else if (IS_ERR(dir_item)) {
	1378	+ ret = PTR_ERR(dir_item);
	1379	+ goto out;
	1380	+ }
	1381	+
	1382	+ /*
	1383	+ * Our inode's dentry collides with the dentry of another inode which is
	1384	+ * in the log but not yet processed since it has a higher inode number.
	1385	+ * So delete that other dentry.
	1386	+ */
	1387	+ btrfs_dir_item_key_to_cpu(path->nodes[0], dir_item, &key);
	1388	+ btrfs_release_path(path);
	1389	+ other_inode = read_one_inode(root, key.objectid);
	1390	+ if (!other_inode) {
	1391	+ ret = -ENOENT;
	1392	+ goto out;
	1393	+ }
	1394	+ ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(other_inode),
	1395	+ name, namelen);
	1396	+ if (ret)
	1397	+ goto out;
	1398	+ /*
	1399	+ * If we dropped the link count to 0, bump it so that later the iput()
	1400	+ * on the inode will not free it. We will fixup the link count later.
	1401	+ */
	1402	+ if (other_inode->i_nlink == 0)
	1403	+ inc_nlink(other_inode);
	1404	+
	1405	+ ret = btrfs_run_delayed_items(trans);
	1406	+ if (ret)
	1407	+ goto out;
	1408	+add_link:
	1409	+ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
	1410	+ name, namelen, 0, ref_index);
	1411	+out:
	1412	+ iput(other_inode);
	1413	+ btrfs_free_path(path);
	1414	+
1347	1415	return ret;
1348	1416	}
1349	1417
..	..	@@ -1480,14 +1548,22 @@
1480	1548	*/
1481	1549	if (!ret && inode->i_nlink == 0)
1482	1550	inc_nlink(inode);
	1551	+ /*
	1552	+ * Whenever we need to check if a name exists or
	1553	+ * not, we check the subvolume tree. So after an
	1554	+ * unlink we must run delayed items, so that future
	1555	+ * checks for a name during log replay see that the
	1556	+ * name does not exists anymore.
	1557	+ */
	1558	+ if (!ret)
	1559	+ ret = btrfs_run_delayed_items(trans);
1483	1560	}
1484	1561	if (ret < 0)
1485	1562	goto out;
1486	1563
1487	1564	/* insert our name */
1488		- ret = btrfs_add_link(trans, BTRFS_I(dir),
1489		- BTRFS_I(inode),
1490		- name, namelen, 0, ref_index);
	1565	+ ret = add_link(trans, root, dir, inode, name, namelen,
	1566	+ ref_index);
1491	1567	if (ret)
1492	1568	goto out;
1493	1569
..	..	@@ -1829,30 +1905,6 @@
1829	1905	}
1830	1906
1831	1907	/*
1832		- * Return true if an inode reference exists in the log for the given name,
1833		- * inode and parent inode.
1834		- */
1835		-static bool name_in_log_ref(struct btrfs_root *log_root,
1836		- const char *name, const int name_len,
1837		- const u64 dirid, const u64 ino)
1838		-{
1839		- struct btrfs_key search_key;
1840		-
1841		- search_key.objectid = ino;
1842		- search_key.type = BTRFS_INODE_REF_KEY;
1843		- search_key.offset = dirid;
1844		- if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1845		- return true;
1846		-
1847		- search_key.type = BTRFS_INODE_EXTREF_KEY;
1848		- search_key.offset = btrfs_extref_hash(dirid, name, name_len);
1849		- if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1850		- return true;
1851		-
1852		- return false;
1853		-}
1854		-
1855		-/*
1856	1908	* take a single entry in a log directory item and replay it into
1857	1909	* the subvolume.
1858	1910	*
..	..	@@ -1975,8 +2027,31 @@
1975	2027	return ret;
1976	2028
1977	2029	insert:
1978		- if (name_in_log_ref(root->log_root, name, name_len,
1979		- key->objectid, log_key.objectid)) {
	2030	+ /*
	2031	+ * Check if the inode reference exists in the log for the given name,
	2032	+ * inode and parent inode
	2033	+ */
	2034	+ found_key.objectid = log_key.objectid;
	2035	+ found_key.type = BTRFS_INODE_REF_KEY;
	2036	+ found_key.offset = key->objectid;
	2037	+ ret = backref_in_log(root->log_root, &found_key, 0, name, name_len);
	2038	+ if (ret < 0) {
	2039	+ goto out;
	2040	+ } else if (ret) {
	2041	+ /* The dentry will be added later. */
	2042	+ ret = 0;
	2043	+ update_size = false;
	2044	+ goto out;
	2045	+ }
	2046	+
	2047	+ found_key.objectid = log_key.objectid;
	2048	+ found_key.type = BTRFS_INODE_EXTREF_KEY;
	2049	+ found_key.offset = key->objectid;
	2050	+ ret = backref_in_log(root->log_root, &found_key, key->objectid, name,
	2051	+ name_len);
	2052	+ if (ret < 0) {
	2053	+ goto out;
	2054	+ } else if (ret) {
1980	2055	/* The dentry will be added later. */
1981	2056	ret = 0;
1982	2057	update_size = false;
..	..	@@ -2629,29 +2704,45 @@
2629	2704	return ret;
2630	2705	}
2631	2706
	2707	+/*
	2708	+ * Correctly adjust the reserved bytes occupied by a log tree extent buffer
	2709	+ */
	2710	+static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
	2711	+{
	2712	+ struct btrfs_block_group *cache;
	2713	+
	2714	+ cache = btrfs_lookup_block_group(fs_info, start);
	2715	+ if (!cache) {
	2716	+ btrfs_err(fs_info, "unable to find block group for %llu", start);
	2717	+ return;
	2718	+ }
	2719	+
	2720	+ spin_lock(&cache->space_info->lock);
	2721	+ spin_lock(&cache->lock);
	2722	+ cache->reserved -= fs_info->nodesize;
	2723	+ cache->space_info->bytes_reserved -= fs_info->nodesize;
	2724	+ spin_unlock(&cache->lock);
	2725	+ spin_unlock(&cache->space_info->lock);
	2726	+
	2727	+ btrfs_put_block_group(cache);
	2728	+}
	2729	+
2632	2730	static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2633	2731	struct btrfs_root *root,
2634	2732	struct btrfs_path path, int level,
2635	2733	struct walk_control *wc)
2636	2734	{
2637	2735	struct btrfs_fs_info *fs_info = root->fs_info;
2638		- u64 root_owner;
2639	2736	u64 bytenr;
2640	2737	u64 ptr_gen;
2641	2738	struct extent_buffer *next;
2642	2739	struct extent_buffer *cur;
2643		- struct extent_buffer *parent;
2644	2740	u32 blocksize;
2645	2741	int ret = 0;
2646		-
2647		- WARN_ON(*level < 0);
2648		- WARN_ON(*level >= BTRFS_MAX_LEVEL);
2649	2742
2650	2743	while (*level > 0) {
2651	2744	struct btrfs_key first_key;
2652	2745
2653		- WARN_ON(*level < 0);
2654		- WARN_ON(*level >= BTRFS_MAX_LEVEL);
2655	2746	cur = path->nodes[*level];
2656	2747
2657	2748	WARN_ON(btrfs_header_level(cur) != *level);
..	..	@@ -2664,9 +2755,6 @@
2664	2755	ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2665	2756	btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
2666	2757	blocksize = fs_info->nodesize;
2667		-
2668		- parent = path->nodes[*level];
2669		- root_owner = btrfs_header_owner(parent);
2670	2758
2671	2759	next = btrfs_find_create_tree_block(fs_info, bytenr);
2672	2760	if (IS_ERR(next))
..	..	@@ -2691,23 +2779,20 @@
2691	2779
2692	2780	if (trans) {
2693	2781	btrfs_tree_lock(next);
2694		- btrfs_set_lock_blocking(next);
2695		- clean_tree_block(fs_info, next);
	2782	+ btrfs_set_lock_blocking_write(next);
	2783	+ btrfs_clean_tree_block(next);
2696	2784	btrfs_wait_tree_block_writeback(next);
2697	2785	btrfs_tree_unlock(next);
	2786	+ ret = btrfs_pin_reserved_extent(trans,
	2787	+ bytenr, blocksize);
	2788	+ if (ret) {
	2789	+ free_extent_buffer(next);
	2790	+ return ret;
	2791	+ }
2698	2792	} else {
2699	2793	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
2700	2794	clear_extent_buffer_dirty(next);
2701		- }
2702		-
2703		- WARN_ON(root_owner !=
2704		- BTRFS_TREE_LOG_OBJECTID);
2705		- ret = btrfs_free_and_pin_reserved_extent(
2706		- fs_info, bytenr,
2707		- blocksize);
2708		- if (ret) {
2709		- free_extent_buffer(next);
2710		- return ret;
	2795	+ unaccount_log_buffer(fs_info, bytenr);
2711	2796	}
2712	2797	}
2713	2798	free_extent_buffer(next);
..	..	@@ -2719,7 +2804,6 @@
2719	2804	return ret;
2720	2805	}
2721	2806
2722		- WARN_ON(*level <= 0);
2723	2807	if (path->nodes[*level-1])
2724	2808	free_extent_buffer(path->nodes[*level-1]);
2725	2809	path->nodes[*level-1] = next;
..	..	@@ -2727,9 +2811,6 @@
2727	2811	path->slots[*level] = 0;
2728	2812	cond_resched();
2729	2813	}
2730		- WARN_ON(*level < 0);
2731		- WARN_ON(*level >= BTRFS_MAX_LEVEL);
2732		-
2733	2814	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
2734	2815
2735	2816	cond_resched();
..	..	@@ -2742,7 +2823,6 @@
2742	2823	struct walk_control *wc)
2743	2824	{
2744	2825	struct btrfs_fs_info *fs_info = root->fs_info;
2745		- u64 root_owner;
2746	2826	int i;
2747	2827	int slot;
2748	2828	int ret;
..	..	@@ -2755,13 +2835,6 @@
2755	2835	WARN_ON(*level == 0);
2756	2836	return 0;
2757	2837	} else {
2758		- struct extent_buffer *parent;
2759		- if (path->nodes[*level] == root->node)
2760		- parent = path->nodes[*level];
2761		- else
2762		- parent = path->nodes[*level + 1];
2763		-
2764		- root_owner = btrfs_header_owner(parent);
2765	2838	ret = wc->process_func(root, path->nodes[*level], wc,
2766	2839	btrfs_header_generation(path->nodes[*level]),
2767	2840	*level);
..	..	@@ -2775,22 +2848,22 @@
2775	2848
2776	2849	if (trans) {
2777	2850	btrfs_tree_lock(next);
2778		- btrfs_set_lock_blocking(next);
2779		- clean_tree_block(fs_info, next);
	2851	+ btrfs_set_lock_blocking_write(next);
	2852	+ btrfs_clean_tree_block(next);
2780	2853	btrfs_wait_tree_block_writeback(next);
2781	2854	btrfs_tree_unlock(next);
	2855	+ ret = btrfs_pin_reserved_extent(trans,
	2856	+ path->nodes[*level]->start,
	2857	+ path->nodes[*level]->len);
	2858	+ if (ret)
	2859	+ return ret;
2782	2860	} else {
2783	2861	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
2784	2862	clear_extent_buffer_dirty(next);
2785		- }
2786	2863
2787		- WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
2788		- ret = btrfs_free_and_pin_reserved_extent(
2789		- fs_info,
2790		- path->nodes[*level]->start,
2791		- path->nodes[*level]->len);
2792		- if (ret)
2793		- return ret;
	2864	+ unaccount_log_buffer(fs_info,
	2865	+ path->nodes[*level]->start);
	2866	+ }
2794	2867	}
2795	2868	free_extent_buffer(path->nodes[*level]);
2796	2869	path->nodes[*level] = NULL;
..	..	@@ -2822,7 +2895,7 @@
2822	2895	level = btrfs_header_level(log->node);
2823	2896	orig_level = level;
2824	2897	path->nodes[level] = log->node;
2825		- extent_buffer_get(log->node);
	2898	+ atomic_inc(&log->node->refs);
2826	2899	path->slots[level] = 0;
2827	2900
2828	2901	while (1) {
..	..	@@ -2857,21 +2930,19 @@
2857	2930
2858	2931	if (trans) {
2859	2932	btrfs_tree_lock(next);
2860		- btrfs_set_lock_blocking(next);
2861		- clean_tree_block(fs_info, next);
	2933	+ btrfs_set_lock_blocking_write(next);
	2934	+ btrfs_clean_tree_block(next);
2862	2935	btrfs_wait_tree_block_writeback(next);
2863	2936	btrfs_tree_unlock(next);
	2937	+ ret = btrfs_pin_reserved_extent(trans,
	2938	+ next->start, next->len);
	2939	+ if (ret)
	2940	+ goto out;
2864	2941	} else {
2865	2942	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
2866	2943	clear_extent_buffer_dirty(next);
	2944	+ unaccount_log_buffer(fs_info, next->start);
2867	2945	}
2868		-
2869		- WARN_ON(log->root_key.objectid !=
2870		- BTRFS_TREE_LOG_OBJECTID);
2871		- ret = btrfs_free_and_pin_reserved_extent(fs_info,
2872		- next->start, next->len);
2873		- if (ret)
2874		- goto out;
2875	2946	}
2876	2947	}
2877	2948
..	..	@@ -3035,7 +3106,7 @@
3035	3106	}
3036	3107
3037	3108	/* bail out if we need to do a full commit */
3038		- if (btrfs_need_log_full_commit(fs_info, trans)) {
	3109	+ if (btrfs_need_log_full_commit(trans)) {
3039	3110	ret = -EAGAIN;
3040	3111	mutex_unlock(&root->log_mutex);
3041	3112	goto out;
..	..	@@ -3054,7 +3125,7 @@
3054	3125	if (ret) {
3055	3126	blk_finish_plug(&plug);
3056	3127	btrfs_abort_transaction(trans, ret);
3057		- btrfs_set_log_full_commit(fs_info, trans);
	3128	+ btrfs_set_log_full_commit(trans);
3058	3129	mutex_unlock(&root->log_mutex);
3059	3130	goto out;
3060	3131	}
..	..	@@ -3088,16 +3159,10 @@
3088	3159	btrfs_init_log_ctx(&root_log_ctx, NULL);
3089	3160
3090	3161	mutex_lock(&log_root_tree->log_mutex);
3091		- atomic_inc(&log_root_tree->log_batch);
3092		- atomic_inc(&log_root_tree->log_writers);
3093	3162
3094	3163	index2 = log_root_tree->log_transid % 2;
3095	3164	list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
3096	3165	root_log_ctx.log_transid = log_root_tree->log_transid;
3097		-
3098		- mutex_unlock(&log_root_tree->log_mutex);
3099		-
3100		- mutex_lock(&log_root_tree->log_mutex);
3101	3166
3102	3167	/*
3103	3168	* Now we are safe to update the log_root_tree because we're under the
..	..	@@ -3105,18 +3170,12 @@
3105	3170	* open until we drop the log_mutex.
3106	3171	*/
3107	3172	ret = update_log_root(trans, log, &new_root_item);
3108		-
3109		- if (atomic_dec_and_test(&log_root_tree->log_writers)) {
3110		- /* atomic_dec_and_test implies a barrier */
3111		- cond_wake_up_nomb(&log_root_tree->log_writer_wait);
3112		- }
3113		-
3114	3173	if (ret) {
3115	3174	if (!list_empty(&root_log_ctx.list))
3116	3175	list_del_init(&root_log_ctx.list);
3117	3176
3118	3177	blk_finish_plug(&plug);
3119		- btrfs_set_log_full_commit(fs_info, trans);
	3178	+ btrfs_set_log_full_commit(trans);
3120	3179
3121	3180	if (ret != -ENOSPC) {
3122	3181	btrfs_abort_transaction(trans, ret);
..	..	@@ -3156,13 +3215,11 @@
3156	3215	root_log_ctx.log_transid - 1);
3157	3216	}
3158	3217
3159		- wait_for_writer(log_root_tree);
3160		-
3161	3218	/*
3162	3219	* now that we've moved on to the tree of log tree roots,
3163	3220	* check the full commit flag again
3164	3221	*/
3165		- if (btrfs_need_log_full_commit(fs_info, trans)) {
	3222	+ if (btrfs_need_log_full_commit(trans)) {
3166	3223	blk_finish_plug(&plug);
3167	3224	btrfs_wait_tree_log_extents(log, mark);
3168	3225	mutex_unlock(&log_root_tree->log_mutex);
..	..	@@ -3175,7 +3232,7 @@
3175	3232	EXTENT_DIRTY \| EXTENT_NEW);
3176	3233	blk_finish_plug(&plug);
3177	3234	if (ret) {
3178		- btrfs_set_log_full_commit(fs_info, trans);
	3235	+ btrfs_set_log_full_commit(trans);
3179	3236	btrfs_abort_transaction(trans, ret);
3180	3237	mutex_unlock(&log_root_tree->log_mutex);
3181	3238	goto out_wake_log_root;
..	..	@@ -3185,7 +3242,7 @@
3185	3242	ret = btrfs_wait_tree_log_extents(log_root_tree,
3186	3243	EXTENT_NEW \| EXTENT_DIRTY);
3187	3244	if (ret) {
3188		- btrfs_set_log_full_commit(fs_info, trans);
	3245	+ btrfs_set_log_full_commit(trans);
3189	3246	mutex_unlock(&log_root_tree->log_mutex);
3190	3247	goto out_wake_log_root;
3191	3248	}
..	..	@@ -3199,7 +3256,7 @@
3199	3256	mutex_unlock(&log_root_tree->log_mutex);
3200	3257
3201	3258	/*
3202		- * nobody else is going to jump in and write the the ctree
	3259	+ * Nobody else is going to jump in and write the ctree
3203	3260	* super here because the log_commit atomic below is protecting
3204	3261	* us. We must be called with a transaction handle pinning
3205	3262	* the running transaction open, so a full commit can't hop
..	..	@@ -3207,7 +3264,7 @@
3207	3264	*/
3208	3265	ret = write_all_supers(fs_info, 1);
3209	3266	if (ret) {
3210		- btrfs_set_log_full_commit(fs_info, trans);
	3267	+ btrfs_set_log_full_commit(trans);
3211	3268	btrfs_abort_transaction(trans, ret);
3212	3269	goto out_wake_log_root;
3213	3270	}
..	..	@@ -3251,8 +3308,6 @@
3251	3308	struct btrfs_root *log)
3252	3309	{
3253	3310	int ret;
3254		- u64 start;
3255		- u64 end;
3256	3311	struct walk_control wc = {
3257	3312	.free = 1,
3258	3313	.process_func = process_one_buffer
..	..	@@ -3266,20 +3321,10 @@
3266	3321	btrfs_handle_fs_error(log->fs_info, ret, NULL);
3267	3322	}
3268	3323
3269		- while (1) {
3270		- ret = find_first_extent_bit(&log->dirty_log_pages,
3271		- 0, &start, &end,
3272		- EXTENT_DIRTY \| EXTENT_NEW \| EXTENT_NEED_WAIT,
3273		- NULL);
3274		- if (ret)
3275		- break;
3276		-
3277		- clear_extent_bits(&log->dirty_log_pages, start, end,
3278		- EXTENT_DIRTY \| EXTENT_NEW \| EXTENT_NEED_WAIT);
3279		- }
3280		-
3281		- free_extent_buffer(log->node);
3282		- kfree(log);
	3324	+ clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
	3325	+ EXTENT_DIRTY \| EXTENT_NEW \| EXTENT_NEED_WAIT);
	3326	+ extent_io_tree_release(&log->log_csum_range);
	3327	+ btrfs_put_root(log);
3283	3328	}
3284	3329
3285	3330	/*
..	..	@@ -3291,6 +3336,7 @@
3291	3336	if (root->log_root) {
3292	3337	free_log_tree(trans, root->log_root);
3293	3338	root->log_root = NULL;
	3339	+ clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
3294	3340	}
3295	3341	return 0;
3296	3342	}
..	..	@@ -3447,7 +3493,7 @@
3447	3493	out_unlock:
3448	3494	mutex_unlock(&dir->log_mutex);
3449	3495	if (err == -ENOSPC) {
3450		- btrfs_set_log_full_commit(root->fs_info, trans);
	3496	+ btrfs_set_log_full_commit(trans);
3451	3497	err = 0;
3452	3498	} else if (err < 0 && err != -ENOENT) {
3453	3499	/* ENOENT can be returned if the entry hasn't been fsynced yet */
..	..	@@ -3465,7 +3511,6 @@
3465	3511	const char *name, int name_len,
3466	3512	struct btrfs_inode *inode, u64 dirid)
3467	3513	{
3468		- struct btrfs_fs_info *fs_info = root->fs_info;
3469	3514	struct btrfs_root *log;
3470	3515	u64 index;
3471	3516	int ret;
..	..	@@ -3483,7 +3528,7 @@
3483	3528	dirid, &index);
3484	3529	mutex_unlock(&inode->log_mutex);
3485	3530	if (ret == -ENOSPC) {
3486		- btrfs_set_log_full_commit(fs_info, trans);
	3531	+ btrfs_set_log_full_commit(trans);
3487	3532	ret = 0;
3488	3533	} else if (ret < 0 && ret != -ENOENT)
3489	3534	btrfs_abort_transaction(trans, ret);
..	..	@@ -3807,8 +3852,9 @@
3807	3852
3808	3853	found_key.offset = 0;
3809	3854	found_key.type = 0;
3810		- ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
3811		- &start_slot);
	3855	+ ret = btrfs_bin_search(path->nodes[0], &found_key, &start_slot);
	3856	+ if (ret < 0)
	3857	+ break;
3812	3858
3813	3859	ret = btrfs_del_items(trans, log, path, start_slot,
3814	3860	path->slots[0] - start_slot + 1);
..	..	@@ -3834,7 +3880,7 @@
3834	3880	{
3835	3881	struct btrfs_map_token token;
3836	3882
3837		- btrfs_init_map_token(&token);
	3883	+ btrfs_init_map_token(&token, leaf);
3838	3884
3839	3885	if (log_inode_only) {
3840	3886	/* set the generation to zero so the recover code
..	..	@@ -3842,44 +3888,41 @@
3842	3888	* just to say 'this inode exists' and a logging
3843	3889	* to say 'update this inode with these values'
3844	3890	*/
3845		- btrfs_set_token_inode_generation(leaf, item, 0, &token);
3846		- btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
	3891	+ btrfs_set_token_inode_generation(&token, item, 0);
	3892	+ btrfs_set_token_inode_size(&token, item, logged_isize);
3847	3893	} else {
3848		- btrfs_set_token_inode_generation(leaf, item,
3849		- BTRFS_I(inode)->generation,
3850		- &token);
3851		- btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
	3894	+ btrfs_set_token_inode_generation(&token, item,
	3895	+ BTRFS_I(inode)->generation);
	3896	+ btrfs_set_token_inode_size(&token, item, inode->i_size);
3852	3897	}
3853	3898
3854		- btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3855		- btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3856		- btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3857		- btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
	3899	+ btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
	3900	+ btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
	3901	+ btrfs_set_token_inode_mode(&token, item, inode->i_mode);
	3902	+ btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
3858	3903
3859		- btrfs_set_token_timespec_sec(leaf, &item->atime,
3860		- inode->i_atime.tv_sec, &token);
3861		- btrfs_set_token_timespec_nsec(leaf, &item->atime,
3862		- inode->i_atime.tv_nsec, &token);
	3904	+ btrfs_set_token_timespec_sec(&token, &item->atime,
	3905	+ inode->i_atime.tv_sec);
	3906	+ btrfs_set_token_timespec_nsec(&token, &item->atime,
	3907	+ inode->i_atime.tv_nsec);
3863	3908
3864		- btrfs_set_token_timespec_sec(leaf, &item->mtime,
3865		- inode->i_mtime.tv_sec, &token);
3866		- btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3867		- inode->i_mtime.tv_nsec, &token);
	3909	+ btrfs_set_token_timespec_sec(&token, &item->mtime,
	3910	+ inode->i_mtime.tv_sec);
	3911	+ btrfs_set_token_timespec_nsec(&token, &item->mtime,
	3912	+ inode->i_mtime.tv_nsec);
3868	3913
3869		- btrfs_set_token_timespec_sec(leaf, &item->ctime,
3870		- inode->i_ctime.tv_sec, &token);
3871		- btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3872		- inode->i_ctime.tv_nsec, &token);
	3914	+ btrfs_set_token_timespec_sec(&token, &item->ctime,
	3915	+ inode->i_ctime.tv_sec);
	3916	+ btrfs_set_token_timespec_nsec(&token, &item->ctime,
	3917	+ inode->i_ctime.tv_nsec);
3873	3918
3874		- btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3875		- &token);
	3919	+ btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
3876	3920
3877		- btrfs_set_token_inode_sequence(leaf, item,
3878		- inode_peek_iversion(inode), &token);
3879		- btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3880		- btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3881		- btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3882		- btrfs_set_token_inode_block_group(leaf, item, 0, &token);
	3921	+ btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
	3922	+ btrfs_set_token_inode_transid(&token, item, trans->transid);
	3923	+ btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
	3924	+ btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
	3925	+ btrfs_set_token_inode_block_group(&token, item, 0);
3883	3926	}
3884	3927
3885	3928	static int log_inode_item(struct btrfs_trans_handle *trans,
..	..	@@ -3902,11 +3945,32 @@
3902	3945	}
3903	3946
3904	3947	static int log_csums(struct btrfs_trans_handle *trans,
	3948	+ struct btrfs_inode *inode,
3905	3949	struct btrfs_root *log_root,
3906	3950	struct btrfs_ordered_sum *sums)
3907	3951	{
	3952	+ const u64 lock_end = sums->bytenr + sums->len - 1;
	3953	+ struct extent_state *cached_state = NULL;
3908	3954	int ret;
3909	3955
	3956	+ /*
	3957	+ * If this inode was not used for reflink operations in the current
	3958	+ * transaction with new extents, then do the fast path, no need to
	3959	+ * worry about logging checksum items with overlapping ranges.
	3960	+ */
	3961	+ if (inode->last_reflink_trans < trans->transid)
	3962	+ return btrfs_csum_file_blocks(trans, log_root, sums);
	3963	+
	3964	+ /*
	3965	+ * Serialize logging for checksums. This is to avoid racing with the
	3966	+ * same checksum being logged by another task that is logging another
	3967	+ * file which happens to refer to the same extent as well. Such races
	3968	+ * can leave checksum items in the log with overlapping ranges.
	3969	+ */
	3970	+ ret = lock_extent_bits(&log_root->log_csum_range, sums->bytenr,
	3971	+ lock_end, &cached_state);
	3972	+ if (ret)
	3973	+ return ret;
3910	3974	/*
3911	3975	* Due to extent cloning, we might have logged a csum item that covers a
3912	3976	* subrange of a cloned extent, and later we can end up logging a csum
..	..	@@ -3917,10 +3981,13 @@
3917	3981	* trim and adjust) any existing csum items in the log for this range.
3918	3982	*/
3919	3983	ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
3920		- if (ret)
3921		- return ret;
	3984	+ if (!ret)
	3985	+ ret = btrfs_csum_file_blocks(trans, log_root, sums);
3922	3986
3923		- return btrfs_csum_file_blocks(trans, log_root, sums);
	3987	+ unlock_extent_cached(&log_root->log_csum_range, sums->bytenr, lock_end,
	3988	+ &cached_state);
	3989	+
	3990	+ return ret;
3924	3991	}
3925	3992
3926	3993	static noinline int copy_items(struct btrfs_trans_handle *trans,
..	..	@@ -4041,7 +4108,7 @@
4041	4108	struct btrfs_ordered_sum,
4042	4109	list);
4043	4110	if (!ret)
4044		- ret = log_csums(trans, log, sums);
	4111	+ ret = log_csums(trans, inode, log, sums);
4045	4112	list_del(&sums->list);
4046	4113	kfree(sums);
4047	4114	}
..	..	@@ -4066,10 +4133,14 @@
4066	4133	static int log_extent_csums(struct btrfs_trans_handle *trans,
4067	4134	struct btrfs_inode *inode,
4068	4135	struct btrfs_root *log_root,
4069		- const struct extent_map *em)
	4136	+ const struct extent_map *em,
	4137	+ struct btrfs_log_ctx *ctx)
4070	4138	{
	4139	+ struct btrfs_ordered_extent *ordered;
4071	4140	u64 csum_offset;
4072	4141	u64 csum_len;
	4142	+ u64 mod_start = em->mod_start;
	4143	+ u64 mod_len = em->mod_len;
4073	4144	LIST_HEAD(ordered_sums);
4074	4145	int ret = 0;
4075	4146
..	..	@@ -4078,13 +4149,71 @@
4078	4149	em->block_start == EXTENT_MAP_HOLE)
4079	4150	return 0;
4080	4151
	4152	+ list_for_each_entry(ordered, &ctx->ordered_extents, log_list) {
	4153	+ const u64 ordered_end = ordered->file_offset + ordered->num_bytes;
	4154	+ const u64 mod_end = mod_start + mod_len;
	4155	+ struct btrfs_ordered_sum *sums;
	4156	+
	4157	+ if (mod_len == 0)
	4158	+ break;
	4159	+
	4160	+ if (ordered_end <= mod_start)
	4161	+ continue;
	4162	+ if (mod_end <= ordered->file_offset)
	4163	+ break;
	4164	+
	4165	+ /*
	4166	+ * We are going to copy all the csums on this ordered extent, so
	4167	+ * go ahead and adjust mod_start and mod_len in case this ordered
	4168	+ * extent has already been logged.
	4169	+ */
	4170	+ if (ordered->file_offset > mod_start) {
	4171	+ if (ordered_end >= mod_end)
	4172	+ mod_len = ordered->file_offset - mod_start;
	4173	+ /*
	4174	+ * If we have this case
	4175	+ *
	4176	+ * \|--------- logged extent ---------\|
	4177	+ * \|----- ordered extent ----\|
	4178	+ *
	4179	+ * Just don't mess with mod_start and mod_len, we'll
	4180	+ * just end up logging more csums than we need and it
	4181	+ * will be ok.
	4182	+ */
	4183	+ } else {
	4184	+ if (ordered_end < mod_end) {
	4185	+ mod_len = mod_end - ordered_end;
	4186	+ mod_start = ordered_end;
	4187	+ } else {
	4188	+ mod_len = 0;
	4189	+ }
	4190	+ }
	4191	+
	4192	+ /*
	4193	+ * To keep us from looping for the above case of an ordered
	4194	+ * extent that falls inside of the logged extent.
	4195	+ */
	4196	+ if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, &ordered->flags))
	4197	+ continue;
	4198	+
	4199	+ list_for_each_entry(sums, &ordered->list, list) {
	4200	+ ret = log_csums(trans, inode, log_root, sums);
	4201	+ if (ret)
	4202	+ return ret;
	4203	+ }
	4204	+ }
	4205	+
	4206	+ /* We're done, found all csums in the ordered extents. */
	4207	+ if (mod_len == 0)
	4208	+ return 0;
	4209	+
4081	4210	/* If we're compressed we have to save the entire range of csums. */
4082	4211	if (em->compress_type) {
4083	4212	csum_offset = 0;
4084	4213	csum_len = max(em->block_len, em->orig_block_len);
4085	4214	} else {
4086		- csum_offset = em->mod_start - em->start;
4087		- csum_len = em->mod_len;
	4215	+ csum_offset = mod_start - em->start;
	4216	+ csum_len = mod_len;
4088	4217	}
4089	4218
4090	4219	/* block start is already adjusted for the file extent offset. */
..	..	@@ -4100,7 +4229,7 @@
4100	4229	struct btrfs_ordered_sum,
4101	4230	list);
4102	4231	if (!ret)
4103		- ret = log_csums(trans, log_root, sums);
	4232	+ ret = log_csums(trans, inode, log_root, sums);
4104	4233	list_del(&sums->list);
4105	4234	kfree(sums);
4106	4235	}
..	..	@@ -4124,13 +4253,11 @@
4124	4253	int ret;
4125	4254	int extent_inserted = 0;
4126	4255
4127		- ret = log_extent_csums(trans, inode, log, em);
	4256	+ ret = log_extent_csums(trans, inode, log, em, ctx);
4128	4257	if (ret)
4129	4258	return ret;
4130	4259
4131		- btrfs_init_map_token(&token);
4132		-
4133		- ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
	4260	+ ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
4134	4261	em->start + em->len, NULL, 0, 1,
4135	4262	sizeof(*fi), &extent_inserted);
4136	4263	if (ret)
..	..	@@ -4147,46 +4274,39 @@
4147	4274	return ret;
4148	4275	}
4149	4276	leaf = path->nodes[0];
	4277	+ btrfs_init_map_token(&token, leaf);
4150	4278	fi = btrfs_item_ptr(leaf, path->slots[0],
4151	4279	struct btrfs_file_extent_item);
4152	4280
4153		- btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
4154		- &token);
	4281	+ btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
4155	4282	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4156		- btrfs_set_token_file_extent_type(leaf, fi,
4157		- BTRFS_FILE_EXTENT_PREALLOC,
4158		- &token);
	4283	+ btrfs_set_token_file_extent_type(&token, fi,
	4284	+ BTRFS_FILE_EXTENT_PREALLOC);
4159	4285	else
4160		- btrfs_set_token_file_extent_type(leaf, fi,
4161		- BTRFS_FILE_EXTENT_REG,
4162		- &token);
	4286	+ btrfs_set_token_file_extent_type(&token, fi,
	4287	+ BTRFS_FILE_EXTENT_REG);
4163	4288
4164	4289	block_len = max(em->block_len, em->orig_block_len);
4165	4290	if (em->compress_type != BTRFS_COMPRESS_NONE) {
4166		- btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
4167		- em->block_start,
4168		- &token);
4169		- btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
4170		- &token);
	4291	+ btrfs_set_token_file_extent_disk_bytenr(&token, fi,
	4292	+ em->block_start);
	4293	+ btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
4171	4294	} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
4172		- btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
	4295	+ btrfs_set_token_file_extent_disk_bytenr(&token, fi,
4173	4296	em->block_start -
4174		- extent_offset, &token);
4175		- btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
4176		- &token);
	4297	+ extent_offset);
	4298	+ btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
4177	4299	} else {
4178		- btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
4179		- btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
4180		- &token);
	4300	+ btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
	4301	+ btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
4181	4302	}
4182	4303
4183		- btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token);
4184		- btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
4185		- btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
4186		- btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
4187		- &token);
4188		- btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
4189		- btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
	4304	+ btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
	4305	+ btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
	4306	+ btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
	4307	+ btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
	4308	+ btrfs_set_token_file_extent_encryption(&token, fi, 0);
	4309	+ btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
4190	4310	btrfs_mark_buffer_dirty(leaf);
4191	4311
4192	4312	btrfs_release_path(path);
..	..	@@ -4196,7 +4316,7 @@
4196	4316
4197	4317	/*
4198	4318	* Log all prealloc extents beyond the inode's i_size to make sure we do not
4199		- * lose them after doing a fast fsync and replaying the log. We scan the
	4319	+ * lose them after doing a full/fast fsync and replaying the log. We scan the
4200	4320	* subvolume's root instead of iterating the inode's extent map tree because
4201	4321	* otherwise we can log incorrect extent items based on extent map conversion.
4202	4322	* That can happen due to the fact that extent maps are merged when they
..	..	@@ -4322,12 +4442,9 @@
4322	4442	}
4323	4443	}
4324	4444	}
4325		- if (ins_nr > 0) {
	4445	+ if (ins_nr > 0)
4326	4446	ret = copy_items(trans, inode, dst_path, path,
4327	4447	start_slot, ins_nr, 1, 0);
4328		- if (ret > 0)
4329		- ret = 0;
4330		- }
4331	4448	out:
4332	4449	btrfs_release_path(path);
4333	4450	btrfs_free_path(dst_path);
..	..	@@ -4338,14 +4455,13 @@
4338	4455	struct btrfs_root *root,
4339	4456	struct btrfs_inode *inode,
4340	4457	struct btrfs_path *path,
4341		- struct btrfs_log_ctx *ctx,
4342		- const u64 start,
4343		- const u64 end)
	4458	+ struct btrfs_log_ctx *ctx)
4344	4459	{
	4460	+ struct btrfs_ordered_extent *ordered;
	4461	+ struct btrfs_ordered_extent *tmp;
4345	4462	struct extent_map em, n;
4346	4463	struct list_head extents;
4347	4464	struct extent_map_tree *tree = &inode->extent_tree;
4348		- u64 logged_start, logged_end;
4349	4465	u64 test_gen;
4350	4466	int ret = 0;
4351	4467	int num = 0;
..	..	@@ -4354,27 +4470,8 @@
4354	4470
4355	4471	write_lock(&tree->lock);
4356	4472	test_gen = root->fs_info->last_trans_committed;
4357		- logged_start = start;
4358		- logged_end = end;
4359	4473
4360	4474	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
4361		- /*
4362		- * Skip extents outside our logging range. It's important to do
4363		- * it for correctness because if we don't ignore them, we may
4364		- * log them before their ordered extent completes, and therefore
4365		- * we could log them without logging their respective checksums
4366		- * (the checksum items are added to the csum tree at the very
4367		- * end of btrfs_finish_ordered_io()). Also leave such extents
4368		- * outside of our range in the list, since we may have another
4369		- * ranged fsync in the near future that needs them. If an extent
4370		- * outside our range corresponds to a hole, log it to avoid
4371		- * leaving gaps between extents (fsck will complain when we are
4372		- * not using the NO_HOLES feature).
4373		- */
4374		- if ((em->start > end \|\| em->start + em->len <= start) &&
4375		- em->block_start != EXTENT_MAP_HOLE)
4376		- continue;
4377		-
4378	4475	list_del_init(&em->list);
4379	4476	/*
4380	4477	* Just an arbitrary number, this can be really CPU intensive
..	..	@@ -4395,11 +4492,6 @@
4395	4492	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
4396	4493	em->start >= i_size_read(&inode->vfs_inode))
4397	4494	continue;
4398		-
4399		- if (em->start < logged_start)
4400		- logged_start = em->start;
4401		- if ((em->start + em->len - 1) > logged_end)
4402		- logged_end = em->start + em->len - 1;
4403	4495
4404	4496	/* Need a ref to keep it from getting evicted from cache */
4405	4497	refcount_inc(&em->refs);
..	..	@@ -4438,8 +4530,32 @@
4438	4530	btrfs_release_path(path);
4439	4531	if (!ret)
4440	4532	ret = btrfs_log_prealloc_extents(trans, inode, path);
	4533	+ if (ret)
	4534	+ return ret;
4441	4535
4442		- return ret;
	4536	+ /*
	4537	+ * We have logged all extents successfully, now make sure the commit of
	4538	+ * the current transaction waits for the ordered extents to complete
	4539	+ * before it commits and wipes out the log trees, otherwise we would
	4540	+ * lose data if an ordered extents completes after the transaction
	4541	+ * commits and a power failure happens after the transaction commit.
	4542	+ */
	4543	+ list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
	4544	+ list_del_init(&ordered->log_list);
	4545	+ set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags);
	4546	+
	4547	+ if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
	4548	+ spin_lock_irq(&inode->ordered_tree.lock);
	4549	+ if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
	4550	+ set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
	4551	+ atomic_inc(&trans->transaction->pending_ordered);
	4552	+ }
	4553	+ spin_unlock_irq(&inode->ordered_tree.lock);
	4554	+ }
	4555	+ btrfs_put_ordered_extent(ordered);
	4556	+ }
	4557	+
	4558	+ return 0;
4443	4559	}
4444	4560
4445	4561	static int logged_inode_size(struct btrfs_root log, struct btrfs_inode inode,
..	..	@@ -4502,6 +4618,10 @@
4502	4618	const u64 ino = btrfs_ino(inode);
4503	4619	int ins_nr = 0;
4504	4620	int start_slot = 0;
	4621	+ bool found_xattrs = false;
	4622	+
	4623	+ if (test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags))
	4624	+ return 0;
4505	4625
4506	4626	key.objectid = ino;
4507	4627	key.type = BTRFS_XATTR_ITEM_KEY;
..	..	@@ -4540,6 +4660,7 @@
4540	4660	start_slot = slot;
4541	4661	ins_nr++;
4542	4662	path->slots[0]++;
	4663	+ found_xattrs = true;
4543	4664	cond_resched();
4544	4665	}
4545	4666	if (ins_nr > 0) {
..	..	@@ -4548,6 +4669,9 @@
4548	4669	if (ret < 0)
4549	4670	return ret;
4550	4671	}
	4672	+
	4673	+ if (!found_xattrs)
	4674	+ set_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags);
4551	4675
4552	4676	return 0;
4553	4677	}
..	..	@@ -4585,9 +4709,7 @@
4585	4709	return ret;
4586	4710
4587	4711	while (true) {
4588		- struct btrfs_file_extent_item *extent;
4589	4712	struct extent_buffer *leaf = path->nodes[0];
4590		- u64 len;
4591	4713
4592	4714	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4593	4715	ret = btrfs_next_leaf(root, path);
..	..	@@ -4636,18 +4758,7 @@
4636	4758	leaf = path->nodes[0];
4637	4759	}
4638	4760
4639		- extent = btrfs_item_ptr(leaf, path->slots[0],
4640		- struct btrfs_file_extent_item);
4641		- if (btrfs_file_extent_type(leaf, extent) ==
4642		- BTRFS_FILE_EXTENT_INLINE) {
4643		- len = btrfs_file_extent_ram_bytes(leaf, extent);
4644		- prev_extent_end = ALIGN(key.offset + len,
4645		- fs_info->sectorsize);
4646		- } else {
4647		- len = btrfs_file_extent_num_bytes(leaf, extent);
4648		- prev_extent_end = key.offset + len;
4649		- }
4650		-
	4761	+ prev_extent_end = btrfs_file_extent_end(path);
4651	4762	path->slots[0]++;
4652	4763	cond_resched();
4653	4764	}
..	..	@@ -4714,7 +4825,7 @@
4714	4825	const int slot,
4715	4826	const struct btrfs_key *key,
4716	4827	struct btrfs_inode *inode,
4717		- u64 *other_ino)
	4828	+ u64 other_ino, u64 other_parent)
4718	4829	{
4719	4830	int ret;
4720	4831	struct btrfs_path *search_path;
..	..	@@ -4777,8 +4888,13 @@
4777	4888	btrfs_dir_item_key_to_cpu(search_path->nodes[0],
4778	4889	di, &di_key);
4779	4890	if (di_key.type == BTRFS_INODE_ITEM_KEY) {
4780		- ret = 1;
4781		- *other_ino = di_key.objectid;
	4891	+ if (di_key.objectid != key->objectid) {
	4892	+ ret = 1;
	4893	+ *other_ino = di_key.objectid;
	4894	+ *other_parent = parent;
	4895	+ } else {
	4896	+ ret = 0;
	4897	+ }
4782	4898	} else {
4783	4899	ret = -EAGAIN;
4784	4900	}
..	..	@@ -4795,6 +4911,334 @@
4795	4911	out:
4796	4912	btrfs_free_path(search_path);
4797	4913	kfree(name);
	4914	+ return ret;
	4915	+}
	4916	+
	4917	+struct btrfs_ino_list {
	4918	+ u64 ino;
	4919	+ u64 parent;
	4920	+ struct list_head list;
	4921	+};
	4922	+
	4923	+static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
	4924	+ struct btrfs_root *root,
	4925	+ struct btrfs_path *path,
	4926	+ struct btrfs_log_ctx *ctx,
	4927	+ u64 ino, u64 parent)
	4928	+{
	4929	+ struct btrfs_ino_list *ino_elem;
	4930	+ LIST_HEAD(inode_list);
	4931	+ int ret = 0;
	4932	+
	4933	+ ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
	4934	+ if (!ino_elem)
	4935	+ return -ENOMEM;
	4936	+ ino_elem->ino = ino;
	4937	+ ino_elem->parent = parent;
	4938	+ list_add_tail(&ino_elem->list, &inode_list);
	4939	+
	4940	+ while (!list_empty(&inode_list)) {
	4941	+ struct btrfs_fs_info *fs_info = root->fs_info;
	4942	+ struct btrfs_key key;
	4943	+ struct inode *inode;
	4944	+
	4945	+ ino_elem = list_first_entry(&inode_list, struct btrfs_ino_list,
	4946	+ list);
	4947	+ ino = ino_elem->ino;
	4948	+ parent = ino_elem->parent;
	4949	+ list_del(&ino_elem->list);
	4950	+ kfree(ino_elem);
	4951	+ if (ret)
	4952	+ continue;
	4953	+
	4954	+ btrfs_release_path(path);
	4955	+
	4956	+ inode = btrfs_iget(fs_info->sb, ino, root);
	4957	+ /*
	4958	+ * If the other inode that had a conflicting dir entry was
	4959	+ * deleted in the current transaction, we need to log its parent
	4960	+ * directory.
	4961	+ */
	4962	+ if (IS_ERR(inode)) {
	4963	+ ret = PTR_ERR(inode);
	4964	+ if (ret == -ENOENT) {
	4965	+ inode = btrfs_iget(fs_info->sb, parent, root);
	4966	+ if (IS_ERR(inode)) {
	4967	+ ret = PTR_ERR(inode);
	4968	+ } else {
	4969	+ ret = btrfs_log_inode(trans, root,
	4970	+ BTRFS_I(inode),
	4971	+ LOG_OTHER_INODE_ALL,
	4972	+ ctx);
	4973	+ btrfs_add_delayed_iput(inode);
	4974	+ }
	4975	+ }
	4976	+ continue;
	4977	+ }
	4978	+ /*
	4979	+ * If the inode was already logged skip it - otherwise we can
	4980	+ * hit an infinite loop. Example:
	4981	+ *
	4982	+ * From the commit root (previous transaction) we have the
	4983	+ * following inodes:
	4984	+ *
	4985	+ * inode 257 a directory
	4986	+ * inode 258 with references "zz" and "zz_link" on inode 257
	4987	+ * inode 259 with reference "a" on inode 257
	4988	+ *
	4989	+ * And in the current (uncommitted) transaction we have:
	4990	+ *
	4991	+ * inode 257 a directory, unchanged
	4992	+ * inode 258 with references "a" and "a2" on inode 257
	4993	+ * inode 259 with reference "zz_link" on inode 257
	4994	+ * inode 261 with reference "zz" on inode 257
	4995	+ *
	4996	+ * When logging inode 261 the following infinite loop could
	4997	+ * happen if we don't skip already logged inodes:
	4998	+ *
	4999	+ * - we detect inode 258 as a conflicting inode, with inode 261
	5000	+ * on reference "zz", and log it;
	5001	+ *
	5002	+ * - we detect inode 259 as a conflicting inode, with inode 258
	5003	+ * on reference "a", and log it;
	5004	+ *
	5005	+ * - we detect inode 258 as a conflicting inode, with inode 259
	5006	+ * on reference "zz_link", and log it - again! After this we
	5007	+ * repeat the above steps forever.
	5008	+ */
	5009	+ spin_lock(&BTRFS_I(inode)->lock);
	5010	+ /*
	5011	+ * Check the inode's logged_trans only instead of
	5012	+ * btrfs_inode_in_log(). This is because the last_log_commit of
	5013	+ * the inode is not updated when we only log that it exists and
	5014	+ * it has the full sync bit set (see btrfs_log_inode()).
	5015	+ */
	5016	+ if (BTRFS_I(inode)->logged_trans == trans->transid) {
	5017	+ spin_unlock(&BTRFS_I(inode)->lock);
	5018	+ btrfs_add_delayed_iput(inode);
	5019	+ continue;
	5020	+ }
	5021	+ spin_unlock(&BTRFS_I(inode)->lock);
	5022	+ /*
	5023	+ * We are safe logging the other inode without acquiring its
	5024	+ * lock as long as we log with the LOG_INODE_EXISTS mode. We
	5025	+ * are safe against concurrent renames of the other inode as
	5026	+ * well because during a rename we pin the log and update the
	5027	+ * log with the new name before we unpin it.
	5028	+ */
	5029	+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
	5030	+ LOG_OTHER_INODE, ctx);
	5031	+ if (ret) {
	5032	+ btrfs_add_delayed_iput(inode);
	5033	+ continue;
	5034	+ }
	5035	+
	5036	+ key.objectid = ino;
	5037	+ key.type = BTRFS_INODE_REF_KEY;
	5038	+ key.offset = 0;
	5039	+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	5040	+ if (ret < 0) {
	5041	+ btrfs_add_delayed_iput(inode);
	5042	+ continue;
	5043	+ }
	5044	+
	5045	+ while (true) {
	5046	+ struct extent_buffer *leaf = path->nodes[0];
	5047	+ int slot = path->slots[0];
	5048	+ u64 other_ino = 0;
	5049	+ u64 other_parent = 0;
	5050	+
	5051	+ if (slot >= btrfs_header_nritems(leaf)) {
	5052	+ ret = btrfs_next_leaf(root, path);
	5053	+ if (ret < 0) {
	5054	+ break;
	5055	+ } else if (ret > 0) {
	5056	+ ret = 0;
	5057	+ break;
	5058	+ }
	5059	+ continue;
	5060	+ }
	5061	+
	5062	+ btrfs_item_key_to_cpu(leaf, &key, slot);
	5063	+ if (key.objectid != ino \|\|
	5064	+ (key.type != BTRFS_INODE_REF_KEY &&
	5065	+ key.type != BTRFS_INODE_EXTREF_KEY)) {
	5066	+ ret = 0;
	5067	+ break;
	5068	+ }
	5069	+
	5070	+ ret = btrfs_check_ref_name_override(leaf, slot, &key,
	5071	+ BTRFS_I(inode), &other_ino,
	5072	+ &other_parent);
	5073	+ if (ret < 0)
	5074	+ break;
	5075	+ if (ret > 0) {
	5076	+ ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
	5077	+ if (!ino_elem) {
	5078	+ ret = -ENOMEM;
	5079	+ break;
	5080	+ }
	5081	+ ino_elem->ino = other_ino;
	5082	+ ino_elem->parent = other_parent;
	5083	+ list_add_tail(&ino_elem->list, &inode_list);
	5084	+ ret = 0;
	5085	+ }
	5086	+ path->slots[0]++;
	5087	+ }
	5088	+ btrfs_add_delayed_iput(inode);
	5089	+ }
	5090	+
	5091	+ return ret;
	5092	+}
	5093	+
	5094	+static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
	5095	+ struct btrfs_inode *inode,
	5096	+ struct btrfs_key *min_key,
	5097	+ const struct btrfs_key *max_key,
	5098	+ struct btrfs_path *path,
	5099	+ struct btrfs_path *dst_path,
	5100	+ const u64 logged_isize,
	5101	+ const bool recursive_logging,
	5102	+ const int inode_only,
	5103	+ struct btrfs_log_ctx *ctx,
	5104	+ bool *need_log_inode_item)
	5105	+{
	5106	+ const u64 i_size = i_size_read(&inode->vfs_inode);
	5107	+ struct btrfs_root *root = inode->root;
	5108	+ int ins_start_slot = 0;
	5109	+ int ins_nr = 0;
	5110	+ int ret;
	5111	+
	5112	+ while (1) {
	5113	+ ret = btrfs_search_forward(root, min_key, path, trans->transid);
	5114	+ if (ret < 0)
	5115	+ return ret;
	5116	+ if (ret > 0) {
	5117	+ ret = 0;
	5118	+ break;
	5119	+ }
	5120	+again:
	5121	+ /* Note, ins_nr might be > 0 here, cleanup outside the loop */
	5122	+ if (min_key->objectid != max_key->objectid)
	5123	+ break;
	5124	+ if (min_key->type > max_key->type)
	5125	+ break;
	5126	+
	5127	+ if (min_key->type == BTRFS_INODE_ITEM_KEY) {
	5128	+ *need_log_inode_item = false;
	5129	+ } else if (min_key->type == BTRFS_EXTENT_DATA_KEY &&
	5130	+ min_key->offset >= i_size) {
	5131	+ /*
	5132	+ * Extents at and beyond eof are logged with
	5133	+ * btrfs_log_prealloc_extents().
	5134	+ * Only regular files have BTRFS_EXTENT_DATA_KEY keys,
	5135	+ * and no keys greater than that, so bail out.
	5136	+ */
	5137	+ break;
	5138	+ } else if ((min_key->type == BTRFS_INODE_REF_KEY \|\|
	5139	+ min_key->type == BTRFS_INODE_EXTREF_KEY) &&
	5140	+ inode->generation == trans->transid &&
	5141	+ !recursive_logging) {
	5142	+ u64 other_ino = 0;
	5143	+ u64 other_parent = 0;
	5144	+
	5145	+ ret = btrfs_check_ref_name_override(path->nodes[0],
	5146	+ path->slots[0], min_key, inode,
	5147	+ &other_ino, &other_parent);
	5148	+ if (ret < 0) {
	5149	+ return ret;
	5150	+ } else if (ret > 0 && ctx &&
	5151	+ other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
	5152	+ if (ins_nr > 0) {
	5153	+ ins_nr++;
	5154	+ } else {
	5155	+ ins_nr = 1;
	5156	+ ins_start_slot = path->slots[0];
	5157	+ }
	5158	+ ret = copy_items(trans, inode, dst_path, path,
	5159	+ ins_start_slot, ins_nr,
	5160	+ inode_only, logged_isize);
	5161	+ if (ret < 0)
	5162	+ return ret;
	5163	+ ins_nr = 0;
	5164	+
	5165	+ ret = log_conflicting_inodes(trans, root, path,
	5166	+ ctx, other_ino, other_parent);
	5167	+ if (ret)
	5168	+ return ret;
	5169	+ btrfs_release_path(path);
	5170	+ goto next_key;
	5171	+ }
	5172	+ } else if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
	5173	+ /* Skip xattrs, logged later with btrfs_log_all_xattrs() */
	5174	+ if (ins_nr == 0)
	5175	+ goto next_slot;
	5176	+ ret = copy_items(trans, inode, dst_path, path,
	5177	+ ins_start_slot,
	5178	+ ins_nr, inode_only, logged_isize);
	5179	+ if (ret < 0)
	5180	+ return ret;
	5181	+ ins_nr = 0;
	5182	+ goto next_slot;
	5183	+ }
	5184	+
	5185	+ if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
	5186	+ ins_nr++;
	5187	+ goto next_slot;
	5188	+ } else if (!ins_nr) {
	5189	+ ins_start_slot = path->slots[0];
	5190	+ ins_nr = 1;
	5191	+ goto next_slot;
	5192	+ }
	5193	+
	5194	+ ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
	5195	+ ins_nr, inode_only, logged_isize);
	5196	+ if (ret < 0)
	5197	+ return ret;
	5198	+ ins_nr = 1;
	5199	+ ins_start_slot = path->slots[0];
	5200	+next_slot:
	5201	+ path->slots[0]++;
	5202	+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
	5203	+ btrfs_item_key_to_cpu(path->nodes[0], min_key,
	5204	+ path->slots[0]);
	5205	+ goto again;
	5206	+ }
	5207	+ if (ins_nr) {
	5208	+ ret = copy_items(trans, inode, dst_path, path,
	5209	+ ins_start_slot, ins_nr, inode_only,
	5210	+ logged_isize);
	5211	+ if (ret < 0)
	5212	+ return ret;
	5213	+ ins_nr = 0;
	5214	+ }
	5215	+ btrfs_release_path(path);
	5216	+next_key:
	5217	+ if (min_key->offset < (u64)-1) {
	5218	+ min_key->offset++;
	5219	+ } else if (min_key->type < max_key->type) {
	5220	+ min_key->type++;
	5221	+ min_key->offset = 0;
	5222	+ } else {
	5223	+ break;
	5224	+ }
	5225	+ }
	5226	+ if (ins_nr) {
	5227	+ ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
	5228	+ ins_nr, inode_only, logged_isize);
	5229	+ if (ret)
	5230	+ return ret;
	5231	+ }
	5232	+
	5233	+ if (inode_only == LOG_INODE_ALL && S_ISREG(inode->vfs_inode.i_mode)) {
	5234	+ /*
	5235	+ * Release the path because otherwise we might attempt to double
	5236	+ * lock the same leaf with btrfs_log_prealloc_extents() below.
	5237	+ */
	5238	+ btrfs_release_path(path);
	5239	+ ret = btrfs_log_prealloc_extents(trans, inode, dst_path);
	5240	+ }
	5241	+
4798	5242	return ret;
4799	5243	}
4800	5244
..	..	@@ -4815,27 +5259,22 @@
4815	5259	static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4816	5260	struct btrfs_root root, struct btrfs_inode inode,
4817	5261	int inode_only,
4818		- const loff_t start,
4819		- const loff_t end,
4820	5262	struct btrfs_log_ctx *ctx)
4821	5263	{
4822		- struct btrfs_fs_info *fs_info = root->fs_info;
4823	5264	struct btrfs_path *path;
4824	5265	struct btrfs_path *dst_path;
4825	5266	struct btrfs_key min_key;
4826	5267	struct btrfs_key max_key;
4827	5268	struct btrfs_root *log = root->log_root;
4828	5269	int err = 0;
4829		- int ret;
4830		- int nritems;
4831		- int ins_start_slot = 0;
4832		- int ins_nr;
	5270	+ int ret = 0;
4833	5271	bool fast_search = false;
4834	5272	u64 ino = btrfs_ino(inode);
4835	5273	struct extent_map_tree *em_tree = &inode->extent_tree;
4836	5274	u64 logged_isize = 0;
4837	5275	bool need_log_inode_item = true;
4838	5276	bool xattrs_logged = false;
	5277	+ bool recursive_logging = false;
4839	5278
4840	5279	path = btrfs_alloc_path();
4841	5280	if (!path)
..	..	@@ -4864,15 +5303,19 @@
4864	5303	max_key.offset = (u64)-1;
4865	5304
4866	5305	/*
4867		- * Only run delayed items if we are a dir or a new file.
4868		- * Otherwise commit the delayed inode only, which is needed in
4869		- * order for the log replay code to mark inodes for link count
4870		- * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
	5306	+ * Only run delayed items if we are a directory. We want to make sure
	5307	+ * all directory indexes hit the fs/subvolume tree so we can find them
	5308	+ * and figure out which index ranges have to be logged.
	5309	+ *
	5310	+ * Otherwise commit the delayed inode only if the full sync flag is set,
	5311	+ * as we want to make sure an up to date version is in the subvolume
	5312	+ * tree so copy_inode_items_to_log() / copy_items() can find it and copy
	5313	+ * it to the log tree. For a non full sync, we always log the inode item
	5314	+ * based on the in-memory struct btrfs_inode which is always up to date.
4871	5315	*/
4872		- if (S_ISDIR(inode->vfs_inode.i_mode) \|\|
4873		- inode->generation > fs_info->last_trans_committed)
	5316	+ if (S_ISDIR(inode->vfs_inode.i_mode))
4874	5317	ret = btrfs_commit_inode_delayed_items(trans, inode);
4875		- else
	5318	+ else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
4876	5319	ret = btrfs_commit_inode_delayed_inode(inode);
4877	5320
4878	5321	if (ret) {
..	..	@@ -4881,12 +5324,28 @@
4881	5324	return ret;
4882	5325	}
4883	5326
4884		- if (inode_only == LOG_OTHER_INODE) {
4885		- inode_only = LOG_INODE_EXISTS;
	5327	+ if (inode_only == LOG_OTHER_INODE \|\| inode_only == LOG_OTHER_INODE_ALL) {
	5328	+ recursive_logging = true;
	5329	+ if (inode_only == LOG_OTHER_INODE)
	5330	+ inode_only = LOG_INODE_EXISTS;
	5331	+ else
	5332	+ inode_only = LOG_INODE_ALL;
4886	5333	mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
4887	5334	} else {
4888	5335	mutex_lock(&inode->log_mutex);
4889	5336	}
	5337	+
	5338	+ /*
	5339	+ * For symlinks, we must always log their content, which is stored in an
	5340	+ * inline extent, otherwise we could end up with an empty symlink after
	5341	+ * log replay, which is invalid on linux (symlink(2) returns -ENOENT if
	5342	+ * one attempts to create an empty symlink).
	5343	+ * We don't need to worry about flushing delalloc, because when we create
	5344	+ * the inline extent when the symlink is created (we never have delalloc
	5345	+ * for symlinks).
	5346	+ */
	5347	+ if (S_ISLNK(inode->vfs_inode.i_mode))
	5348	+ inode_only = LOG_INODE_ALL;
4890	5349
4891	5350	/*
4892	5351	* a brute force approach to making sure we get the most uptodate
..	..	@@ -4955,170 +5414,12 @@
4955	5414	goto out_unlock;
4956	5415	}
4957	5416
4958		- while (1) {
4959		- ins_nr = 0;
4960		- ret = btrfs_search_forward(root, &min_key,
4961		- path, trans->transid);
4962		- if (ret < 0) {
4963		- err = ret;
4964		- goto out_unlock;
4965		- }
4966		- if (ret != 0)
4967		- break;
4968		-again:
4969		- /* note, ins_nr might be > 0 here, cleanup outside the loop */
4970		- if (min_key.objectid != ino)
4971		- break;
4972		- if (min_key.type > max_key.type)
4973		- break;
4974		-
4975		- if (min_key.type == BTRFS_INODE_ITEM_KEY)
4976		- need_log_inode_item = false;
4977		-
4978		- if ((min_key.type == BTRFS_INODE_REF_KEY \|\|
4979		- min_key.type == BTRFS_INODE_EXTREF_KEY) &&
4980		- inode->generation == trans->transid) {
4981		- u64 other_ino = 0;
4982		-
4983		- ret = btrfs_check_ref_name_override(path->nodes[0],
4984		- path->slots[0], &min_key, inode,
4985		- &other_ino);
4986		- if (ret < 0) {
4987		- err = ret;
4988		- goto out_unlock;
4989		- } else if (ret > 0 && ctx &&
4990		- other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
4991		- struct btrfs_key inode_key;
4992		- struct inode *other_inode;
4993		-
4994		- if (ins_nr > 0) {
4995		- ins_nr++;
4996		- } else {
4997		- ins_nr = 1;
4998		- ins_start_slot = path->slots[0];
4999		- }
5000		- ret = copy_items(trans, inode, dst_path, path,
5001		- ins_start_slot,
5002		- ins_nr, inode_only,
5003		- logged_isize);
5004		- if (ret < 0) {
5005		- err = ret;
5006		- goto out_unlock;
5007		- }
5008		- ins_nr = 0;
5009		- btrfs_release_path(path);
5010		- inode_key.objectid = other_ino;
5011		- inode_key.type = BTRFS_INODE_ITEM_KEY;
5012		- inode_key.offset = 0;
5013		- other_inode = btrfs_iget(fs_info->sb,
5014		- &inode_key, root,
5015		- NULL);
5016		- /*
5017		- * If the other inode that had a conflicting dir
5018		- * entry was deleted in the current transaction,
5019		- * we don't need to do more work nor fallback to
5020		- * a transaction commit.
5021		- */
5022		- if (other_inode == ERR_PTR(-ENOENT)) {
5023		- goto next_key;
5024		- } else if (IS_ERR(other_inode)) {
5025		- err = PTR_ERR(other_inode);
5026		- goto out_unlock;
5027		- }
5028		- /*
5029		- * We are safe logging the other inode without
5030		- * acquiring its i_mutex as long as we log with
5031		- * the LOG_INODE_EXISTS mode. We're safe against
5032		- * concurrent renames of the other inode as well
5033		- * because during a rename we pin the log and
5034		- * update the log with the new name before we
5035		- * unpin it.
5036		- */
5037		- err = btrfs_log_inode(trans, root,
5038		- BTRFS_I(other_inode),
5039		- LOG_OTHER_INODE, 0, LLONG_MAX,
5040		- ctx);
5041		- btrfs_add_delayed_iput(other_inode);
5042		- if (err)
5043		- goto out_unlock;
5044		- else
5045		- goto next_key;
5046		- }
5047		- }
5048		-
5049		- /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
5050		- if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
5051		- if (ins_nr == 0)
5052		- goto next_slot;
5053		- ret = copy_items(trans, inode, dst_path, path,
5054		- ins_start_slot,
5055		- ins_nr, inode_only, logged_isize);
5056		- if (ret < 0) {
5057		- err = ret;
5058		- goto out_unlock;
5059		- }
5060		- ins_nr = 0;
5061		- goto next_slot;
5062		- }
5063		-
5064		- if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
5065		- ins_nr++;
5066		- goto next_slot;
5067		- } else if (!ins_nr) {
5068		- ins_start_slot = path->slots[0];
5069		- ins_nr = 1;
5070		- goto next_slot;
5071		- }
5072		-
5073		- ret = copy_items(trans, inode, dst_path, path,
5074		- ins_start_slot, ins_nr, inode_only,
5075		- logged_isize);
5076		- if (ret < 0) {
5077		- err = ret;
5078		- goto out_unlock;
5079		- }
5080		- ins_nr = 1;
5081		- ins_start_slot = path->slots[0];
5082		-next_slot:
5083		-
5084		- nritems = btrfs_header_nritems(path->nodes[0]);
5085		- path->slots[0]++;
5086		- if (path->slots[0] < nritems) {
5087		- btrfs_item_key_to_cpu(path->nodes[0], &min_key,
5088		- path->slots[0]);
5089		- goto again;
5090		- }
5091		- if (ins_nr) {
5092		- ret = copy_items(trans, inode, dst_path, path,
5093		- ins_start_slot,
5094		- ins_nr, inode_only, logged_isize);
5095		- if (ret < 0) {
5096		- err = ret;
5097		- goto out_unlock;
5098		- }
5099		- ins_nr = 0;
5100		- }
5101		- btrfs_release_path(path);
5102		-next_key:
5103		- if (min_key.offset < (u64)-1) {
5104		- min_key.offset++;
5105		- } else if (min_key.type < max_key.type) {
5106		- min_key.type++;
5107		- min_key.offset = 0;
5108		- } else {
5109		- break;
5110		- }
5111		- }
5112		- if (ins_nr) {
5113		- ret = copy_items(trans, inode, dst_path, path,
5114		- ins_start_slot, ins_nr, inode_only,
5115		- logged_isize);
5116		- if (ret < 0) {
5117		- err = ret;
5118		- goto out_unlock;
5119		- }
5120		- ins_nr = 0;
5121		- }
	5417	+ err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
	5418	+ path, dst_path, logged_isize,
	5419	+ recursive_logging, inode_only, ctx,
	5420	+ &need_log_inode_item);
	5421	+ if (err)
	5422	+ goto out_unlock;
5122	5423
5123	5424	btrfs_release_path(path);
5124	5425	btrfs_release_path(dst_path);
..	..	@@ -5148,7 +5449,7 @@
5148	5449	}
5149	5450	if (fast_search) {
5150	5451	ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
5151		- ctx, start, end);
	5452	+ ctx);
5152	5453	if (ret) {
5153	5454	err = ret;
5154	5455	goto out_unlock;
..	..	@@ -5157,31 +5458,8 @@
5157	5458	struct extent_map em, n;
5158	5459
5159	5460	write_lock(&em_tree->lock);
5160		- /*
5161		- * We can't just remove every em if we're called for a ranged
5162		- * fsync - that is, one that doesn't cover the whole possible
5163		- * file range (0 to LLONG_MAX). This is because we can have
5164		- * em's that fall outside the range we're logging and therefore
5165		- * their ordered operations haven't completed yet
5166		- * (btrfs_finish_ordered_io() not invoked yet). This means we
5167		- * didn't get their respective file extent item in the fs/subvol
5168		- * tree yet, and need to let the next fast fsync (one which
5169		- * consults the list of modified extent maps) find the em so
5170		- * that it logs a matching file extent item and waits for the
5171		- * respective ordered operation to complete (if it's still
5172		- * running).
5173		- *
5174		- * Removing every em outside the range we're logging would make
5175		- * the next fast fsync not log their matching file extent items,
5176		- * therefore making us lose data after a log replay.
5177		- */
5178		- list_for_each_entry_safe(em, n, &em_tree->modified_extents,
5179		- list) {
5180		- const u64 mod_end = em->mod_start + em->mod_len - 1;
5181		-
5182		- if (em->mod_start >= start && mod_end <= end)
5183		- list_del_init(&em->list);
5184		- }
	5461	+ list_for_each_entry_safe(em, n, &em_tree->modified_extents, list)
	5462	+ list_del_init(&em->list);
5185	5463	write_unlock(&em_tree->lock);
5186	5464	}
5187	5465
..	..	@@ -5195,19 +5473,34 @@
5195	5473	}
5196	5474
5197	5475	/*
5198		- * Don't update last_log_commit if we logged that an inode exists after
5199		- * it was loaded to memory (full_sync bit set).
5200		- * This is to prevent data loss when we do a write to the inode, then
5201		- * the inode gets evicted after all delalloc was flushed, then we log
5202		- * it exists (due to a rename for example) and then fsync it. This last
5203		- * fsync would do nothing (not logging the extents previously written).
	5476	+ * If we are logging that an ancestor inode exists as part of logging a
	5477	+ * new name from a link or rename operation, don't mark the inode as
	5478	+ * logged - otherwise if an explicit fsync is made against an ancestor,
	5479	+ * the fsync considers the inode in the log and doesn't sync the log,
	5480	+ * resulting in the ancestor missing after a power failure unless the
	5481	+ * log was synced as part of an fsync against any other unrelated inode.
	5482	+ * So keep it simple for this case and just don't flag the ancestors as
	5483	+ * logged.
5204	5484	*/
5205		- spin_lock(&inode->lock);
5206		- inode->logged_trans = trans->transid;
5207		- if (inode_only != LOG_INODE_EXISTS \|\|
5208		- !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
5209		- inode->last_log_commit = inode->last_sub_trans;
5210		- spin_unlock(&inode->lock);
	5485	+ if (!ctx \|\|
	5486	+ !(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name &&
	5487	+ &inode->vfs_inode != ctx->inode)) {
	5488	+ spin_lock(&inode->lock);
	5489	+ inode->logged_trans = trans->transid;
	5490	+ /*
	5491	+ * Don't update last_log_commit if we logged that an inode exists
	5492	+ * after it was loaded to memory (full_sync bit set).
	5493	+ * This is to prevent data loss when we do a write to the inode,
	5494	+ * then the inode gets evicted after all delalloc was flushed,
	5495	+ * then we log it exists (due to a rename for example) and then
	5496	+ * fsync it. This last fsync would do nothing (not logging the
	5497	+ * extents previously written).
	5498	+ */
	5499	+ if (inode_only != LOG_INODE_EXISTS \|\|
	5500	+ !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
	5501	+ inode->last_log_commit = inode->last_sub_trans;
	5502	+ spin_unlock(&inode->lock);
	5503	+ }
5211	5504	out_unlock:
5212	5505	mutex_unlock(&inode->log_mutex);
5213	5506
..	..	@@ -5244,7 +5537,7 @@
5244	5537	* Make sure any commits to the log are forced to be full
5245	5538	* commits.
5246	5539	*/
5247		- btrfs_set_log_full_commit(fs_info, trans);
	5540	+ btrfs_set_log_full_commit(trans);
5248	5541	ret = true;
5249	5542	}
5250	5543	mutex_unlock(&inode->log_mutex);
..	..	@@ -5432,7 +5725,7 @@
5432	5725	continue;
5433	5726
5434	5727	btrfs_release_path(path);
5435		- di_inode = btrfs_iget(fs_info->sb, &di_key, root, NULL);
	5728	+ di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
5436	5729	if (IS_ERR(di_inode)) {
5437	5730	ret = PTR_ERR(di_inode);
5438	5731	goto next_dir_inode;
..	..	@@ -5444,10 +5737,10 @@
5444	5737	}
5445	5738
5446	5739	ctx->log_new_dentries = false;
5447		- if (type == BTRFS_FT_DIR \|\| type == BTRFS_FT_SYMLINK)
	5740	+ if (type == BTRFS_FT_DIR)
5448	5741	log_mode = LOG_INODE_ALL;
5449	5742	ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
5450		- log_mode, 0, LLONG_MAX, ctx);
	5743	+ log_mode, ctx);
5451	5744	if (!ret &&
5452	5745	btrfs_must_commit_transaction(trans, BTRFS_I(di_inode)))
5453	5746	ret = 1;
..	..	@@ -5558,8 +5851,8 @@
5558	5851	cur_offset = item_size;
5559	5852	}
5560	5853
5561		- dir_inode = btrfs_iget(fs_info->sb, &inode_key,
5562		- root, NULL);
	5854	+ dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
	5855	+ root);
5563	5856	/*
5564	5857	* If the parent inode was deleted, return an error to
5565	5858	* fallback to a transaction commit. This is to prevent
..	..	@@ -5591,7 +5884,7 @@
5591	5884	if (ctx)
5592	5885	ctx->log_new_dentries = false;
5593	5886	ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode),
5594		- LOG_INODE_ALL, 0, LLONG_MAX, ctx);
	5887	+ LOG_INODE_ALL, ctx);
5595	5888	if (!ret &&
5596	5889	btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode)))
5597	5890	ret = 1;
..	..	@@ -5610,6 +5903,192 @@
5610	5903	return ret;
5611	5904	}
5612	5905
	5906	+static int log_new_ancestors(struct btrfs_trans_handle *trans,
	5907	+ struct btrfs_root *root,
	5908	+ struct btrfs_path *path,
	5909	+ struct btrfs_log_ctx *ctx)
	5910	+{
	5911	+ struct btrfs_key found_key;
	5912	+
	5913	+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
	5914	+
	5915	+ while (true) {
	5916	+ struct btrfs_fs_info *fs_info = root->fs_info;
	5917	+ const u64 last_committed = fs_info->last_trans_committed;
	5918	+ struct extent_buffer *leaf = path->nodes[0];
	5919	+ int slot = path->slots[0];
	5920	+ struct btrfs_key search_key;
	5921	+ struct inode *inode;
	5922	+ u64 ino;
	5923	+ int ret = 0;
	5924	+
	5925	+ btrfs_release_path(path);
	5926	+
	5927	+ ino = found_key.offset;
	5928	+
	5929	+ search_key.objectid = found_key.offset;
	5930	+ search_key.type = BTRFS_INODE_ITEM_KEY;
	5931	+ search_key.offset = 0;
	5932	+ inode = btrfs_iget(fs_info->sb, ino, root);
	5933	+ if (IS_ERR(inode))
	5934	+ return PTR_ERR(inode);
	5935	+
	5936	+ if (BTRFS_I(inode)->generation > last_committed)
	5937	+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
	5938	+ LOG_INODE_EXISTS, ctx);
	5939	+ btrfs_add_delayed_iput(inode);
	5940	+ if (ret)
	5941	+ return ret;
	5942	+
	5943	+ if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
	5944	+ break;
	5945	+
	5946	+ search_key.type = BTRFS_INODE_REF_KEY;
	5947	+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
	5948	+ if (ret < 0)
	5949	+ return ret;
	5950	+
	5951	+ leaf = path->nodes[0];
	5952	+ slot = path->slots[0];
	5953	+ if (slot >= btrfs_header_nritems(leaf)) {
	5954	+ ret = btrfs_next_leaf(root, path);
	5955	+ if (ret < 0)
	5956	+ return ret;
	5957	+ else if (ret > 0)
	5958	+ return -ENOENT;
	5959	+ leaf = path->nodes[0];
	5960	+ slot = path->slots[0];
	5961	+ }
	5962	+
	5963	+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
	5964	+ if (found_key.objectid != search_key.objectid \|\|
	5965	+ found_key.type != BTRFS_INODE_REF_KEY)
	5966	+ return -ENOENT;
	5967	+ }
	5968	+ return 0;
	5969	+}
	5970	+
	5971	+static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
	5972	+ struct btrfs_inode *inode,
	5973	+ struct dentry *parent,
	5974	+ struct btrfs_log_ctx *ctx)
	5975	+{
	5976	+ struct btrfs_root *root = inode->root;
	5977	+ struct btrfs_fs_info *fs_info = root->fs_info;
	5978	+ struct dentry *old_parent = NULL;
	5979	+ struct super_block *sb = inode->vfs_inode.i_sb;
	5980	+ int ret = 0;
	5981	+
	5982	+ while (true) {
	5983	+ if (!parent \|\| d_really_is_negative(parent) \|\|
	5984	+ sb != parent->d_sb)
	5985	+ break;
	5986	+
	5987	+ inode = BTRFS_I(d_inode(parent));
	5988	+ if (root != inode->root)
	5989	+ break;
	5990	+
	5991	+ if (inode->generation > fs_info->last_trans_committed) {
	5992	+ ret = btrfs_log_inode(trans, root, inode,
	5993	+ LOG_INODE_EXISTS, ctx);
	5994	+ if (ret)
	5995	+ break;
	5996	+ }
	5997	+ if (IS_ROOT(parent))
	5998	+ break;
	5999	+
	6000	+ parent = dget_parent(parent);
	6001	+ dput(old_parent);
	6002	+ old_parent = parent;
	6003	+ }
	6004	+ dput(old_parent);
	6005	+
	6006	+ return ret;
	6007	+}
	6008	+
	6009	+static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
	6010	+ struct btrfs_inode *inode,
	6011	+ struct dentry *parent,
	6012	+ struct btrfs_log_ctx *ctx)
	6013	+{
	6014	+ struct btrfs_root *root = inode->root;
	6015	+ const u64 ino = btrfs_ino(inode);
	6016	+ struct btrfs_path *path;
	6017	+ struct btrfs_key search_key;
	6018	+ int ret;
	6019	+
	6020	+ /*
	6021	+ * For a single hard link case, go through a fast path that does not
	6022	+ * need to iterate the fs/subvolume tree.
	6023	+ */
	6024	+ if (inode->vfs_inode.i_nlink < 2)
	6025	+ return log_new_ancestors_fast(trans, inode, parent, ctx);
	6026	+
	6027	+ path = btrfs_alloc_path();
	6028	+ if (!path)
	6029	+ return -ENOMEM;
	6030	+
	6031	+ search_key.objectid = ino;
	6032	+ search_key.type = BTRFS_INODE_REF_KEY;
	6033	+ search_key.offset = 0;
	6034	+again:
	6035	+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
	6036	+ if (ret < 0)
	6037	+ goto out;
	6038	+ if (ret == 0)
	6039	+ path->slots[0]++;
	6040	+
	6041	+ while (true) {
	6042	+ struct extent_buffer *leaf = path->nodes[0];
	6043	+ int slot = path->slots[0];
	6044	+ struct btrfs_key found_key;
	6045	+
	6046	+ if (slot >= btrfs_header_nritems(leaf)) {
	6047	+ ret = btrfs_next_leaf(root, path);
	6048	+ if (ret < 0)
	6049	+ goto out;
	6050	+ else if (ret > 0)
	6051	+ break;
	6052	+ continue;
	6053	+ }
	6054	+
	6055	+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
	6056	+ if (found_key.objectid != ino \|\|
	6057	+ found_key.type > BTRFS_INODE_EXTREF_KEY)
	6058	+ break;
	6059	+
	6060	+ /*
	6061	+ * Don't deal with extended references because they are rare
	6062	+ * cases and too complex to deal with (we would need to keep
	6063	+ * track of which subitem we are processing for each item in
	6064	+ * this loop, etc). So just return some error to fallback to
	6065	+ * a transaction commit.
	6066	+ */
	6067	+ if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
	6068	+ ret = -EMLINK;
	6069	+ goto out;
	6070	+ }
	6071	+
	6072	+ /*
	6073	+ * Logging ancestors needs to do more searches on the fs/subvol
	6074	+ * tree, so it releases the path as needed to avoid deadlocks.
	6075	+ * Keep track of the last inode ref key and resume from that key
	6076	+ * after logging all new ancestors for the current hard link.
	6077	+ */
	6078	+ memcpy(&search_key, &found_key, sizeof(search_key));
	6079	+
	6080	+ ret = log_new_ancestors(trans, root, path, ctx);
	6081	+ if (ret)
	6082	+ goto out;
	6083	+ btrfs_release_path(path);
	6084	+ goto again;
	6085	+ }
	6086	+ ret = 0;
	6087	+out:
	6088	+ btrfs_free_path(path);
	6089	+ return ret;
	6090	+}
	6091	+
5613	6092	/*
5614	6093	* helper function around btrfs_log_inode to make sure newly created
5615	6094	* parent directories also end up in the log. A minimal inode and backref
..	..	@@ -5619,19 +6098,15 @@
5619	6098	static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
5620	6099	struct btrfs_inode *inode,
5621	6100	struct dentry *parent,
5622		- const loff_t start,
5623		- const loff_t end,
5624	6101	int inode_only,
5625	6102	struct btrfs_log_ctx *ctx)
5626	6103	{
5627	6104	struct btrfs_root *root = inode->root;
5628	6105	struct btrfs_fs_info *fs_info = root->fs_info;
5629	6106	struct super_block *sb;
5630		- struct dentry *old_parent = NULL;
5631	6107	int ret = 0;
5632	6108	u64 last_committed = fs_info->last_trans_committed;
5633	6109	bool log_dentries = false;
5634		- struct btrfs_inode *orig_inode = inode;
5635	6110
5636	6111	sb = inode->vfs_inode.i_sb;
5637	6112
..	..	@@ -5665,7 +6140,8 @@
5665	6140	* (since logging them is pointless, a link count of 0 means they
5666	6141	* will never be accessible).
5667	6142	*/
5668		- if (btrfs_inode_in_log(inode, trans->transid) \|\|
	6143	+ if ((btrfs_inode_in_log(inode, trans->transid) &&
	6144	+ list_empty(&ctx->ordered_extents)) \|\|
5669	6145	inode->vfs_inode.i_nlink == 0) {
5670	6146	ret = BTRFS_NO_LOG_SYNC;
5671	6147	goto end_no_trans;
..	..	@@ -5675,7 +6151,7 @@
5675	6151	if (ret)
5676	6152	goto end_no_trans;
5677	6153
5678		- ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
	6154	+ ret = btrfs_log_inode(trans, root, inode, inode_only, ctx);
5679	6155	if (ret)
5680	6156	goto end_trans;
5681	6157
..	..	@@ -5737,56 +6213,22 @@
5737	6213	* and has a link count of 2.
5738	6214	*/
5739	6215	if (inode->last_unlink_trans > last_committed) {
5740		- ret = btrfs_log_all_parents(trans, orig_inode, ctx);
	6216	+ ret = btrfs_log_all_parents(trans, inode, ctx);
5741	6217	if (ret)
5742	6218	goto end_trans;
5743	6219	}
5744	6220
5745		- /*
5746		- * If a new hard link was added to the inode in the current transaction
5747		- * and its link count is now greater than 1, we need to fallback to a
5748		- * transaction commit, otherwise we can end up not logging all its new
5749		- * parents for all the hard links. Here just from the dentry used to
5750		- * fsync, we can not visit the ancestor inodes for all the other hard
5751		- * links to figure out if any is new, so we fallback to a transaction
5752		- * commit (instead of adding a lot of complexity of scanning a btree,
5753		- * since this scenario is not a common use case).
5754		- */
5755		- if (inode->vfs_inode.i_nlink > 1 &&
5756		- inode->last_link_trans > last_committed) {
5757		- ret = -EMLINK;
	6221	+ ret = log_all_new_ancestors(trans, inode, parent, ctx);
	6222	+ if (ret)
5758	6223	goto end_trans;
5759		- }
5760	6224
5761		- while (1) {
5762		- if (!parent \|\| d_really_is_negative(parent) \|\| sb != parent->d_sb)
5763		- break;
5764		-
5765		- inode = BTRFS_I(d_inode(parent));
5766		- if (root != inode->root)
5767		- break;
5768		-
5769		- if (inode->generation > last_committed) {
5770		- ret = btrfs_log_inode(trans, root, inode,
5771		- LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
5772		- if (ret)
5773		- goto end_trans;
5774		- }
5775		- if (IS_ROOT(parent))
5776		- break;
5777		-
5778		- parent = dget_parent(parent);
5779		- dput(old_parent);
5780		- old_parent = parent;
5781		- }
5782	6225	if (log_dentries)
5783		- ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
	6226	+ ret = log_new_dir_dentries(trans, root, inode, ctx);
5784	6227	else
5785	6228	ret = 0;
5786	6229	end_trans:
5787		- dput(old_parent);
5788	6230	if (ret < 0) {
5789		- btrfs_set_log_full_commit(fs_info, trans);
	6231	+ btrfs_set_log_full_commit(trans);
5790	6232	ret = 1;
5791	6233	}
5792	6234
..	..	@@ -5805,15 +6247,13 @@
5805	6247	*/
5806	6248	int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
5807	6249	struct dentry *dentry,
5808		- const loff_t start,
5809		- const loff_t end,
5810	6250	struct btrfs_log_ctx *ctx)
5811	6251	{
5812	6252	struct dentry *parent = dget_parent(dentry);
5813	6253	int ret;
5814	6254
5815	6255	ret = btrfs_log_inode_parent(trans, BTRFS_I(d_inode(dentry)), parent,
5816		- start, end, LOG_INODE_ALL, ctx);
	6256	+ LOG_INODE_ALL, ctx);
5817	6257	dput(parent);
5818	6258
5819	6259	return ret;
..	..	@@ -5830,12 +6270,11 @@
5830	6270	struct btrfs_trans_handle *trans;
5831	6271	struct btrfs_key key;
5832	6272	struct btrfs_key found_key;
5833		- struct btrfs_key tmp_key;
5834	6273	struct btrfs_root *log;
5835	6274	struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
5836	6275	struct walk_control wc = {
5837	6276	.process_func = process_one_buffer,
5838		- .stage = 0,
	6277	+ .stage = LOG_WALK_PIN_ONLY,
5839	6278	};
5840	6279
5841	6280	path = btrfs_alloc_path();
..	..	@@ -5884,7 +6323,7 @@
5884	6323	if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
5885	6324	break;
5886	6325
5887		- log = btrfs_read_fs_root(log_root_tree, &found_key);
	6326	+ log = btrfs_read_tree_root(log_root_tree, &found_key);
5888	6327	if (IS_ERR(log)) {
5889	6328	ret = PTR_ERR(log);
5890	6329	btrfs_handle_fs_error(fs_info, ret,
..	..	@@ -5892,11 +6331,8 @@
5892	6331	goto error;
5893	6332	}
5894	6333
5895		- tmp_key.objectid = found_key.offset;
5896		- tmp_key.type = BTRFS_ROOT_ITEM_KEY;
5897		- tmp_key.offset = (u64)-1;
5898		-
5899		- wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
	6334	+ wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset,
	6335	+ true);
5900	6336	if (IS_ERR(wc.replay_dest)) {
5901	6337	ret = PTR_ERR(wc.replay_dest);
5902	6338
..	..	@@ -5912,12 +6348,10 @@
5912	6348	* each subsequent pass.
5913	6349	*/
5914	6350	if (ret == -ENOENT)
5915		- ret = btrfs_pin_extent_for_log_replay(fs_info,
	6351	+ ret = btrfs_pin_extent_for_log_replay(trans,
5916	6352	log->node->start,
5917	6353	log->node->len);
5918		- free_extent_buffer(log->node);
5919		- free_extent_buffer(log->commit_root);
5920		- kfree(log);
	6354	+ btrfs_put_root(log);
5921	6355
5922	6356	if (!ret)
5923	6357	goto next;
..	..	@@ -5953,9 +6387,8 @@
5953	6387	}
5954	6388
5955	6389	wc.replay_dest->log_root = NULL;
5956		- free_extent_buffer(log->node);
5957		- free_extent_buffer(log->commit_root);
5958		- kfree(log);
	6390	+ btrfs_put_root(wc.replay_dest);
	6391	+ btrfs_put_root(log);
5959	6392
5960	6393	if (ret)
5961	6394	goto error;
..	..	@@ -5986,10 +6419,9 @@
5986	6419	if (ret)
5987	6420	return ret;
5988	6421
5989		- free_extent_buffer(log_root_tree->node);
5990	6422	log_root_tree->log_root = NULL;
5991	6423	clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
5992		- kfree(log_root_tree);
	6424	+ btrfs_put_root(log_root_tree);
5993	6425
5994	6426	return 0;
5995	6427	error:
..	..	@@ -6085,26 +6517,12 @@
6085	6517	/*
6086	6518	* Call this after adding a new name for a file and it will properly
6087	6519	* update the log to reflect the new name.
6088		- *
6089		- * @ctx can not be NULL when @sync_log is false, and should be NULL when it's
6090		- * true (because it's not used).
6091		- *
6092		- * Return value depends on whether @sync_log is true or false.
6093		- * When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
6094		- * committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT
6095		- * otherwise.
6096		- * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to
6097		- * to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
6098		- * or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
6099		- * committed (without attempting to sync the log).
6100	6520	*/
6101		-int btrfs_log_new_name(struct btrfs_trans_handle *trans,
	6521	+void btrfs_log_new_name(struct btrfs_trans_handle *trans,
6102	6522	struct btrfs_inode inode, struct btrfs_inode old_dir,
6103		- struct dentry *parent,
6104		- bool sync_log, struct btrfs_log_ctx *ctx)
	6523	+ struct dentry *parent)
6105	6524	{
6106		- struct btrfs_fs_info *fs_info = trans->fs_info;
6107		- int ret;
	6525	+ struct btrfs_log_ctx ctx;
6108	6526
6109	6527	/*
6110	6528	* this will force the logging code to walk the dentry chain
..	..	@@ -6117,36 +6535,19 @@
6117	6535	* if this inode hasn't been logged and directory we're renaming it
6118	6536	* from hasn't been logged, we don't need to log it
6119	6537	*/
6120		- if (inode->logged_trans <= fs_info->last_trans_committed &&
6121		- (!old_dir \|\| old_dir->logged_trans <= fs_info->last_trans_committed))
6122		- return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT :
6123		- BTRFS_DONT_NEED_LOG_SYNC;
	6538	+ if (!inode_logged(trans, inode) &&
	6539	+ (!old_dir \|\| !inode_logged(trans, old_dir)))
	6540	+ return;
6124	6541
6125		- if (sync_log) {
6126		- struct btrfs_log_ctx ctx2;
6127		-
6128		- btrfs_init_log_ctx(&ctx2, &inode->vfs_inode);
6129		- ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6130		- LOG_INODE_EXISTS, &ctx2);
6131		- if (ret == BTRFS_NO_LOG_SYNC)
6132		- return BTRFS_DONT_NEED_TRANS_COMMIT;
6133		- else if (ret)
6134		- return BTRFS_NEED_TRANS_COMMIT;
6135		-
6136		- ret = btrfs_sync_log(trans, inode->root, &ctx2);
6137		- if (ret)
6138		- return BTRFS_NEED_TRANS_COMMIT;
6139		- return BTRFS_DONT_NEED_TRANS_COMMIT;
6140		- }
6141		-
6142		- ASSERT(ctx);
6143		- ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6144		- LOG_INODE_EXISTS, ctx);
6145		- if (ret == BTRFS_NO_LOG_SYNC)
6146		- return BTRFS_DONT_NEED_LOG_SYNC;
6147		- else if (ret)
6148		- return BTRFS_NEED_TRANS_COMMIT;
6149		-
6150		- return BTRFS_NEED_LOG_SYNC;
	6542	+ btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
	6543	+ ctx.logging_new_name = true;
	6544	+ /*
	6545	+ * We don't care about the return value. If we fail to log the new name
	6546	+ * then we know the next attempt to sync the log will fallback to a full
	6547	+ * transaction commit (due to a call to btrfs_set_log_full_commit()), so
	6548	+ * we don't need to worry about getting a log committed that has an
	6549	+ * inconsistent state after a rename operation.
	6550	+ */
	6551	+ btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
6151	6552	}
6152	6553