~hc/RK356X_SDK_RELEASE.git

..	..	@@ -35,6 +35,22 @@
35	35
36	36	#define RBIO_CACHE_SIZE 1024
37	37
	38	+#define BTRFS_STRIPE_HASH_TABLE_BITS 11
	39	+
	40	+/* Used by the raid56 code to lock stripes for read/modify/write */
	41	+struct btrfs_stripe_hash {
	42	+ struct list_head hash_list;
	43	+ spinlock_t lock;
	44	+};
	45	+
	46	+/* Used by the raid56 code to lock stripes for read/modify/write */
	47	+struct btrfs_stripe_hash_table {
	48	+ struct list_head stripe_cache;
	49	+ spinlock_t cache_lock;
	50	+ int cache_size;
	51	+ struct btrfs_stripe_hash table[];
	52	+};
	53	+
38	54	enum btrfs_rbio_ops {
39	55	BTRFS_RBIO_WRITE,
40	56	BTRFS_RBIO_READ_REBUILD,
..	..	@@ -174,7 +190,7 @@
174	190
175	191	static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
176	192	{
177		- btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
	193	+ btrfs_init_work(&rbio->work, work_func, NULL, NULL);
178	194	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
179	195	}
180	196
..	..	@@ -190,7 +206,6 @@
190	206	struct btrfs_stripe_hash *h;
191	207	int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
192	208	int i;
193		- int table_size;
194	209
195	210	if (info->stripe_hash_table)
196	211	return 0;
..	..	@@ -202,8 +217,7 @@
202	217	* Try harder to allocate and fallback to vmalloc to lower the chance
203	218	* of a failing mount.
204	219	*/
205		- table_size = sizeof(table) + sizeof(h) * num_entries;
206		- table = kvzalloc(table_size, GFP_KERNEL);
	220	+ table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
207	221	if (!table)
208	222	return -ENOMEM;
209	223
..	..	@@ -318,6 +332,9 @@
318	332	{
319	333	bio_list_merge(&dest->bio_list, &victim->bio_list);
320	334	dest->bio_list_bytes += victim->bio_list_bytes;
	335	+ /* Also inherit the bitmaps from @victim. */
	336	+ bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
	337	+ dest->stripe_npages);
321	338	dest->generic_bio_cnt += victim->generic_bio_cnt;
322	339	bio_list_init(&victim->bio_list);
323	340	}
..	..	@@ -655,8 +672,7 @@
655	672	*/
656	673	static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
657	674	{
658		- int bucket = rbio_bucket(rbio);
659		- struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
	675	+ struct btrfs_stripe_hash *h;
660	676	struct btrfs_raid_bio *cur;
661	677	struct btrfs_raid_bio *pending;
662	678	unsigned long flags;
..	..	@@ -664,64 +680,63 @@
664	680	struct btrfs_raid_bio *cache_drop = NULL;
665	681	int ret = 0;
666	682
	683	+ h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
	684	+
667	685	spin_lock_irqsave(&h->lock, flags);
668	686	list_for_each_entry(cur, &h->hash_list, hash_list) {
669		- if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
670		- spin_lock(&cur->bio_list_lock);
	687	+ if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
	688	+ continue;
671	689
672		- /* can we steal this cached rbio's pages? */
673		- if (bio_list_empty(&cur->bio_list) &&
674		- list_empty(&cur->plug_list) &&
675		- test_bit(RBIO_CACHE_BIT, &cur->flags) &&
676		- !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
677		- list_del_init(&cur->hash_list);
678		- refcount_dec(&cur->refs);
	690	+ spin_lock(&cur->bio_list_lock);
679	691
680		- steal_rbio(cur, rbio);
681		- cache_drop = cur;
682		- spin_unlock(&cur->bio_list_lock);
	692	+ /* Can we steal this cached rbio's pages? */
	693	+ if (bio_list_empty(&cur->bio_list) &&
	694	+ list_empty(&cur->plug_list) &&
	695	+ test_bit(RBIO_CACHE_BIT, &cur->flags) &&
	696	+ !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
	697	+ list_del_init(&cur->hash_list);
	698	+ refcount_dec(&cur->refs);
683	699
684		- goto lockit;
685		- }
	700	+ steal_rbio(cur, rbio);
	701	+ cache_drop = cur;
	702	+ spin_unlock(&cur->bio_list_lock);
686	703
687		- /* can we merge into the lock owner? */
688		- if (rbio_can_merge(cur, rbio)) {
689		- merge_rbio(cur, rbio);
	704	+ goto lockit;
	705	+ }
	706	+
	707	+ /* Can we merge into the lock owner? */
	708	+ if (rbio_can_merge(cur, rbio)) {
	709	+ merge_rbio(cur, rbio);
	710	+ spin_unlock(&cur->bio_list_lock);
	711	+ freeit = rbio;
	712	+ ret = 1;
	713	+ goto out;
	714	+ }
	715	+
	716	+
	717	+ /*
	718	+ * We couldn't merge with the running rbio, see if we can merge
	719	+ * with the pending ones. We don't have to check for rmw_locked
	720	+ * because there is no way they are inside finish_rmw right now
	721	+ */
	722	+ list_for_each_entry(pending, &cur->plug_list, plug_list) {
	723	+ if (rbio_can_merge(pending, rbio)) {
	724	+ merge_rbio(pending, rbio);
690	725	spin_unlock(&cur->bio_list_lock);
691	726	freeit = rbio;
692	727	ret = 1;
693	728	goto out;
694	729	}
695		-
696		-
697		- /*
698		- * we couldn't merge with the running
699		- * rbio, see if we can merge with the
700		- * pending ones. We don't have to
701		- * check for rmw_locked because there
702		- * is no way they are inside finish_rmw
703		- * right now
704		- */
705		- list_for_each_entry(pending, &cur->plug_list,
706		- plug_list) {
707		- if (rbio_can_merge(pending, rbio)) {
708		- merge_rbio(pending, rbio);
709		- spin_unlock(&cur->bio_list_lock);
710		- freeit = rbio;
711		- ret = 1;
712		- goto out;
713		- }
714		- }
715		-
716		- /* no merging, put us on the tail of the plug list,
717		- * our rbio will be started with the currently
718		- * running rbio unlocks
719		- */
720		- list_add_tail(&rbio->plug_list, &cur->plug_list);
721		- spin_unlock(&cur->bio_list_lock);
722		- ret = 1;
723		- goto out;
724	730	}
	731	+
	732	+ /*
	733	+ * No merging, put us on the tail of the plug list, our rbio
	734	+ * will be started with the currently running rbio unlocks
	735	+ */
	736	+ list_add_tail(&rbio->plug_list, &cur->plug_list);
	737	+ spin_unlock(&cur->bio_list_lock);
	738	+ ret = 1;
	739	+ goto out;
725	740	}
726	741	lockit:
727	742	refcount_inc(&rbio->refs);
..	..	@@ -862,6 +877,12 @@
862	877
863	878	if (rbio->generic_bio_cnt)
864	879	btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
	880	+ /*
	881	+ * Clear the data bitmap, as the rbio may be cached for later usage.
	882	+ * do this before before unlock_stripe() so there will be no new bio
	883	+ * for this bio.
	884	+ */
	885	+ bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
865	886
866	887	/*
867	888	* At this moment, rbio->bio_list is empty, however since rbio does not
..	..	@@ -1071,7 +1092,6 @@
1071	1092	unsigned long bio_max_len)
1072	1093	{
1073	1094	struct bio *last = bio_list->tail;
1074		- u64 last_end = 0;
1075	1095	int ret;
1076	1096	struct bio *bio;
1077	1097	struct btrfs_bio_stripe *stripe;
..	..	@@ -1086,15 +1106,14 @@
1086	1106
1087	1107	/* see if we can add this page onto our existing bio */
1088	1108	if (last) {
1089		- last_end = (u64)last->bi_iter.bi_sector << 9;
	1109	+ u64 last_end = (u64)last->bi_iter.bi_sector << 9;
1090	1110	last_end += last->bi_iter.bi_size;
1091	1111
1092	1112	/*
1093	1113	* we can't merge these if they are from different
1094	1114	* devices or if they are not contiguous
1095	1115	*/
1096		- if (last_end == disk_start && stripe->dev->bdev &&
1097		- !last->bi_status &&
	1116	+ if (last_end == disk_start && !last->bi_status &&
1098	1117	last->bi_disk == stripe->dev->bdev->bd_disk &&
1099	1118	last->bi_partno == stripe->dev->bdev->bd_partno) {
1100	1119	ret = bio_add_page(last, page, PAGE_SIZE, 0);
..	..	@@ -1105,6 +1124,7 @@
1105	1124
1106	1125	/* put a new bio on the list */
1107	1126	bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
	1127	+ btrfs_io_bio(bio)->device = stripe->dev;
1108	1128	bio->bi_iter.bi_size = 0;
1109	1129	bio_set_dev(bio, stripe->dev->bdev);
1110	1130	bio->bi_iter.bi_sector = disk_start >> 9;
..	..	@@ -1196,6 +1216,9 @@
1196	1216	else
1197	1217	BUG();
1198	1218
	1219	+ /* We should have at least one data sector. */
	1220	+ ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
	1221	+
1199	1222	/* at this point we either have a full stripe,
1200	1223	* or we've read the full stripe from the drive.
1201	1224	* recalculate the parity and write the new results.
..	..	@@ -1269,6 +1292,11 @@
1269	1292	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1270	1293	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1271	1294	struct page *page;
	1295	+
	1296	+ /* This vertical stripe has no data, skip it. */
	1297	+ if (!test_bit(pagenr, rbio->dbitmap))
	1298	+ continue;
	1299	+
1272	1300	if (stripe < rbio->nr_data) {
1273	1301	page = page_in_rbio(rbio, stripe, pagenr, 1);
1274	1302	if (!page)
..	..	@@ -1293,6 +1321,11 @@
1293	1321
1294	1322	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1295	1323	struct page *page;
	1324	+
	1325	+ /* This vertical stripe has no data, skip it. */
	1326	+ if (!test_bit(pagenr, rbio->dbitmap))
	1327	+ continue;
	1328	+
1296	1329	if (stripe < rbio->nr_data) {
1297	1330	page = page_in_rbio(rbio, stripe, pagenr, 1);
1298	1331	if (!page)
..	..	@@ -1313,11 +1346,7 @@
1313	1346	atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1314	1347	BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
1315	1348
1316		- while (1) {
1317		- bio = bio_list_pop(&bio_list);
1318		- if (!bio)
1319		- break;
1320		-
	1349	+ while ((bio = bio_list_pop(&bio_list))) {
1321	1350	bio->bi_private = rbio;
1322	1351	bio->bi_end_io = raid_write_end_io;
1323	1352	bio->bi_opf = REQ_OP_WRITE;
..	..	@@ -1342,7 +1371,6 @@
1342	1371	struct bio *bio)
1343	1372	{
1344	1373	u64 physical = bio->bi_iter.bi_sector;
1345		- u64 stripe_start;
1346	1374	int i;
1347	1375	struct btrfs_bio_stripe *stripe;
1348	1376
..	..	@@ -1350,9 +1378,7 @@
1350	1378
1351	1379	for (i = 0; i < rbio->bbio->num_stripes; i++) {
1352	1380	stripe = &rbio->bbio->stripes[i];
1353		- stripe_start = stripe->physical;
1354		- if (physical >= stripe_start &&
1355		- physical < stripe_start + rbio->stripe_len &&
	1381	+ if (in_range(physical, stripe->physical, rbio->stripe_len) &&
1356	1382	stripe->dev->bdev &&
1357	1383	bio->bi_disk == stripe->dev->bdev->bd_disk &&
1358	1384	bio->bi_partno == stripe->dev->bdev->bd_partno) {
..	..	@@ -1370,18 +1396,14 @@
1370	1396	static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1371	1397	struct bio *bio)
1372	1398	{
1373		- u64 logical = bio->bi_iter.bi_sector;
1374		- u64 stripe_start;
	1399	+ u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1375	1400	int i;
1376	1401
1377		- logical <<= 9;
1378		-
1379	1402	for (i = 0; i < rbio->nr_data; i++) {
1380		- stripe_start = rbio->bbio->raid_map[i];
1381		- if (logical >= stripe_start &&
1382		- logical < stripe_start + rbio->stripe_len) {
	1403	+ u64 stripe_start = rbio->bbio->raid_map[i];
	1404	+
	1405	+ if (in_range(logical, stripe_start, rbio->stripe_len))
1383	1406	return i;
1384		- }
1385	1407	}
1386	1408	return -1;
1387	1409	}
..	..	@@ -1439,11 +1461,11 @@
1439	1461	static void set_bio_pages_uptodate(struct bio *bio)
1440	1462	{
1441	1463	struct bio_vec *bvec;
1442		- int i;
	1464	+ struct bvec_iter_all iter_all;
1443	1465
1444	1466	ASSERT(!bio_flagged(bio, BIO_CLONED));
1445	1467
1446		- bio_for_each_segment_all(bvec, bio, i)
	1468	+ bio_for_each_segment_all(bvec, bio, iter_all)
1447	1469	SetPageUptodate(bvec->bv_page);
1448	1470	}
1449	1471
..	..	@@ -1555,11 +1577,7 @@
1555	1577	* not to touch it after that
1556	1578	*/
1557	1579	atomic_set(&rbio->stripes_pending, bios_to_read);
1558		- while (1) {
1559		- bio = bio_list_pop(&bio_list);
1560		- if (!bio)
1561		- break;
1562		-
	1580	+ while ((bio = bio_list_pop(&bio_list))) {
1563	1581	bio->bi_private = rbio;
1564	1582	bio->bi_end_io = raid_rmw_end_io;
1565	1583	bio->bi_opf = REQ_OP_READ;
..	..	@@ -1724,13 +1742,39 @@
1724	1742	plug = container_of(cb, struct btrfs_plug_cb, cb);
1725	1743
1726	1744	if (from_schedule) {
1727		- btrfs_init_work(&plug->work, btrfs_rmw_helper,
1728		- unplug_work, NULL, NULL);
	1745	+ btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
1729	1746	btrfs_queue_work(plug->info->rmw_workers,
1730	1747	&plug->work);
1731	1748	return;
1732	1749	}
1733	1750	run_plug(plug);
	1751	+}
	1752	+
	1753	+/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
	1754	+static void rbio_add_bio(struct btrfs_raid_bio rbio, struct bio orig_bio)
	1755	+{
	1756	+ const struct btrfs_fs_info *fs_info = rbio->fs_info;
	1757	+ const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
	1758	+ const u64 full_stripe_start = rbio->bbio->raid_map[0];
	1759	+ const u32 orig_len = orig_bio->bi_iter.bi_size;
	1760	+ const u32 sectorsize = fs_info->sectorsize;
	1761	+ u64 cur_logical;
	1762	+
	1763	+ ASSERT(orig_logical >= full_stripe_start &&
	1764	+ orig_logical + orig_len <= full_stripe_start +
	1765	+ rbio->nr_data * rbio->stripe_len);
	1766	+
	1767	+ bio_list_add(&rbio->bio_list, orig_bio);
	1768	+ rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
	1769	+
	1770	+ /* Update the dbitmap. */
	1771	+ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
	1772	+ cur_logical += sectorsize) {
	1773	+ int bit = ((u32)(cur_logical - full_stripe_start) >>
	1774	+ PAGE_SHIFT) % rbio->stripe_npages;
	1775	+
	1776	+ set_bit(bit, rbio->dbitmap);
	1777	+ }
1734	1778	}
1735	1779
1736	1780	/*
..	..	@@ -1749,9 +1793,8 @@
1749	1793	btrfs_put_bbio(bbio);
1750	1794	return PTR_ERR(rbio);
1751	1795	}
1752		- bio_list_add(&rbio->bio_list, bio);
1753		- rbio->bio_list_bytes = bio->bi_iter.bi_size;
1754	1796	rbio->operation = BTRFS_RBIO_WRITE;
	1797	+ rbio_add_bio(rbio, bio);
1755	1798
1756	1799	btrfs_bio_counter_inc_noblocked(fs_info);
1757	1800	rbio->generic_bio_cnt = 1;
..	..	@@ -1867,11 +1910,8 @@
1867	1910	}
1868	1911
1869	1912	/* make sure our ps and qs are in order */
1870		- if (faila > failb) {
1871		- int tmp = failb;
1872		- failb = faila;
1873		- faila = tmp;
1874		- }
	1913	+ if (faila > failb)
	1914	+ swap(faila, failb);
1875	1915
1876	1916	/* if the q stripe is failed, do a pstripe reconstruction
1877	1917	* from the xors.
..	..	@@ -1977,7 +2017,7 @@
1977	2017	* - In case of single failure, where rbio->failb == -1:
1978	2018	*
1979	2019	* Cache this rbio iff the above read reconstruction is
1980		- * excuted without problems.
	2020	+ * executed without problems.
1981	2021	*/
1982	2022	if (err == BLK_STS_OK && rbio->failb < 0)
1983	2023	cache_rbio_pages(rbio);
..	..	@@ -2053,9 +2093,12 @@
2053	2093	atomic_set(&rbio->error, 0);
2054	2094
2055	2095	/*
2056		- * read everything that hasn't failed. Thanks to the
2057		- * stripe cache, it is possible that some or all of these
2058		- * pages are going to be uptodate.
	2096	+ * Read everything that hasn't failed. However this time we will
	2097	+ * not trust any cached sector.
	2098	+ * As we may read out some stale data but higher layer is not reading
	2099	+ * that stale part.
	2100	+ *
	2101	+ * So here we always re-read everything in recovery path.
2059	2102	*/
2060	2103	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
2061	2104	if (rbio->faila == stripe \|\| rbio->failb == stripe) {
..	..	@@ -2064,16 +2107,6 @@
2064	2107	}
2065	2108
2066	2109	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
2067		- struct page *p;
2068		-
2069		- /*
2070		- * the rmw code may have already read this
2071		- * page in
2072		- */
2073		- p = rbio_stripe_page(rbio, stripe, pagenr);
2074		- if (PageUptodate(p))
2075		- continue;
2076		-
2077	2110	ret = rbio_add_io_page(rbio, &bio_list,
2078	2111	rbio_stripe_page(rbio, stripe, pagenr),
2079	2112	stripe, pagenr, rbio->stripe_len);
..	..	@@ -2091,7 +2124,7 @@
2091	2124	*/
2092	2125	if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
2093	2126	__raid_recover_end_io(rbio);
2094		- goto out;
	2127	+ return 0;
2095	2128	} else {
2096	2129	goto cleanup;
2097	2130	}
..	..	@@ -2102,11 +2135,7 @@
2102	2135	* not to touch it after that
2103	2136	*/
2104	2137	atomic_set(&rbio->stripes_pending, bios_to_read);
2105		- while (1) {
2106		- bio = bio_list_pop(&bio_list);
2107		- if (!bio)
2108		- break;
2109		-
	2138	+ while ((bio = bio_list_pop(&bio_list))) {
2110	2139	bio->bi_private = rbio;
2111	2140	bio->bi_end_io = raid_recover_end_io;
2112	2141	bio->bi_opf = REQ_OP_READ;
..	..	@@ -2115,7 +2144,7 @@
2115	2144
2116	2145	submit_bio(bio);
2117	2146	}
2118		-out:
	2147	+
2119	2148	return 0;
2120	2149
2121	2150	cleanup:
..	..	@@ -2155,8 +2184,7 @@
2155	2184	}
2156	2185
2157	2186	rbio->operation = BTRFS_RBIO_READ_REBUILD;
2158		- bio_list_add(&rbio->bio_list, bio);
2159		- rbio->bio_list_bytes = bio->bi_iter.bi_size;
	2187	+ rbio_add_bio(rbio, bio);
2160	2188
2161	2189	rbio->faila = find_logical_bio_stripe(rbio, bio);
2162	2190	if (rbio->faila == -1) {
..	..	@@ -2470,11 +2498,7 @@
2470	2498
2471	2499	atomic_set(&rbio->stripes_pending, nr_data);
2472	2500
2473		- while (1) {
2474		- bio = bio_list_pop(&bio_list);
2475		- if (!bio)
2476		- break;
2477		-
	2501	+ while ((bio = bio_list_pop(&bio_list))) {
2478	2502	bio->bi_private = rbio;
2479	2503	bio->bi_end_io = raid_write_end_io;
2480	2504	bio->bi_opf = REQ_OP_WRITE;
..	..	@@ -2652,11 +2676,7 @@
2652	2676	* not to touch it after that
2653	2677	*/
2654	2678	atomic_set(&rbio->stripes_pending, bios_to_read);
2655		- while (1) {
2656		- bio = bio_list_pop(&bio_list);
2657		- if (!bio)
2658		- break;
2659		-
	2679	+ while ((bio = bio_list_pop(&bio_list))) {
2660	2680	bio->bi_private = rbio;
2661	2681	bio->bi_end_io = raid56_parity_scrub_end_io;
2662	2682	bio->bi_opf = REQ_OP_READ;