~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,10 +1,9 @@
	1	+// SPDX-License-Identifier: GPL-2.0
1	2	/*
2	3	* Functions to sequence PREFLUSH and FUA writes.
3	4	*
4	5	* Copyright (C) 2011 Max Planck Institute for Gravitational Physics
5	6	* Copyright (C) 2011 Tejun Heo <tj@kernel.org>
6		- *
7		- * This file is released under the GPLv2.
8	7	*
9	8	* REQ_{PREFLUSH\|FUA} requests are decomposed to sequences consisted of three
10	9	* optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
..	..	@@ -70,6 +69,7 @@
70	69	#include <linux/blkdev.h>
71	70	#include <linux/gfp.h>
72	71	#include <linux/blk-mq.h>
	72	+#include <linux/lockdep.h>
73	73
74	74	#include "blk.h"
75	75	#include "blk-mq.h"
..	..	@@ -93,7 +93,7 @@
93	93	FLUSH_PENDING_TIMEOUT = 5 * HZ,
94	94	};
95	95
96		-static bool blk_kick_flush(struct request_queue *q,
	96	+static void blk_kick_flush(struct request_queue *q,
97	97	struct blk_flush_queue *fq, unsigned int flags);
98	98
99	99	static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
..	..	@@ -132,18 +132,20 @@
132	132	rq->end_io = rq->flush.saved_end_io;
133	133	}
134	134
135		-static bool blk_flush_queue_rq(struct request *rq, bool add_front)
	135	+static void blk_flush_queue_rq(struct request *rq, bool add_front)
136	136	{
137		- if (rq->q->mq_ops) {
138		- blk_mq_add_to_requeue_list(rq, add_front, true);
139		- return false;
140		- } else {
141		- if (add_front)
142		- list_add(&rq->queuelist, &rq->q->queue_head);
143		- else
144		- list_add_tail(&rq->queuelist, &rq->q->queue_head);
145		- return true;
146		- }
	137	+ blk_mq_add_to_requeue_list(rq, add_front, true);
	138	+}
	139	+
	140	+static void blk_account_io_flush(struct request *rq)
	141	+{
	142	+ struct hd_struct *part = &rq->rq_disk->part0;
	143	+
	144	+ part_stat_lock();
	145	+ part_stat_inc(part, ios[STAT_FLUSH]);
	146	+ part_stat_add(part, nsecs[STAT_FLUSH],
	147	+ ktime_get_ns() - rq->start_time_ns);
	148	+ part_stat_unlock();
147	149	}
148	150
149	151	/**
..	..	@@ -157,18 +159,14 @@
157	159	* completion and trigger the next step.
158	160	*
159	161	* CONTEXT:
160		- * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
161		- *
162		- * RETURNS:
163		- * %true if requests were added to the dispatch queue, %false otherwise.
	162	+ * spin_lock_irq(fq->mq_flush_lock)
164	163	*/
165		-static bool blk_flush_complete_seq(struct request *rq,
	164	+static void blk_flush_complete_seq(struct request *rq,
166	165	struct blk_flush_queue *fq,
167	166	unsigned int seq, blk_status_t error)
168	167	{
169	168	struct request_queue *q = rq->q;
170	169	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
171		- bool queued = false, kicked;
172	170	unsigned int cmd_flags;
173	171
174	172	BUG_ON(rq->flush.seq & seq);
..	..	@@ -191,12 +189,12 @@
191	189
192	190	case REQ_FSEQ_DATA:
193	191	list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
194		- queued = blk_flush_queue_rq(rq, true);
	192	+ blk_flush_queue_rq(rq, true);
195	193	break;
196	194
197	195	case REQ_FSEQ_DONE:
198	196	/*
199		- * @rq was previously adjusted by blk_flush_issue() for
	197	+ * @rq was previously adjusted by blk_insert_flush() for
200	198	* flush sequencing and may already have gone through the
201	199	* flush data request completion path. Restore @rq for
202	200	* normal completion and end it.
..	..	@@ -204,52 +202,50 @@
204	202	BUG_ON(!list_empty(&rq->queuelist));
205	203	list_del_init(&rq->flush.list);
206	204	blk_flush_restore_request(rq);
207		- if (q->mq_ops)
208		- blk_mq_end_request(rq, error);
209		- else
210		- __blk_end_request_all(rq, error);
	205	+ blk_mq_end_request(rq, error);
211	206	break;
212	207
213	208	default:
214	209	BUG();
215	210	}
216	211
217		- kicked = blk_kick_flush(q, fq, cmd_flags);
218		- return kicked \| queued;
	212	+ blk_kick_flush(q, fq, cmd_flags);
219	213	}
220	214
221	215	static void flush_end_io(struct request *flush_rq, blk_status_t error)
222	216	{
223	217	struct request_queue *q = flush_rq->q;
224	218	struct list_head *running;
225		- bool queued = false;
226	219	struct request rq, n;
227	220	unsigned long flags = 0;
228	221	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
229	222
230		- if (q->mq_ops) {
231		- struct blk_mq_hw_ctx *hctx;
	223	+ /* release the tag's ownership to the req cloned from */
	224	+ spin_lock_irqsave(&fq->mq_flush_lock, flags);
232	225
233		- /* release the tag's ownership to the req cloned from */
234		- spin_lock_irqsave(&fq->mq_flush_lock, flags);
	226	+ if (!refcount_dec_and_test(&flush_rq->ref)) {
	227	+ fq->rq_status = error;
	228	+ spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
	229	+ return;
	230	+ }
235	231
236		- if (!refcount_dec_and_test(&flush_rq->ref)) {
237		- fq->rq_status = error;
238		- spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
239		- return;
240		- }
	232	+ blk_account_io_flush(flush_rq);
	233	+ /*
	234	+ * Flush request has to be marked as IDLE when it is really ended
	235	+ * because its .end_io() is called from timeout code path too for
	236	+ * avoiding use-after-free.
	237	+ */
	238	+ WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
	239	+ if (fq->rq_status != BLK_STS_OK) {
	240	+ error = fq->rq_status;
	241	+ fq->rq_status = BLK_STS_OK;
	242	+ }
241	243
242		- if (fq->rq_status != BLK_STS_OK)
243		- error = fq->rq_status;
244		-
245		- hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
246		- if (!q->elevator) {
247		- blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
248		- flush_rq->tag = -1;
249		- } else {
250		- blk_mq_put_driver_tag_hctx(hctx, flush_rq);
251		- flush_rq->internal_tag = -1;
252		- }
	244	+ if (!q->elevator) {
	245	+ flush_rq->tag = BLK_MQ_NO_TAG;
	246	+ } else {
	247	+ blk_mq_put_driver_tag(flush_rq);
	248	+ flush_rq->internal_tag = BLK_MQ_NO_TAG;
253	249	}
254	250
255	251	running = &fq->flush_queue[fq->flush_running_idx];
..	..	@@ -258,35 +254,20 @@
258	254	/* account completion of the flush request */
259	255	fq->flush_running_idx ^= 1;
260	256
261		- if (!q->mq_ops)
262		- elv_completed_request(q, flush_rq);
263		-
264	257	/* and push the waiting requests to the next stage */
265	258	list_for_each_entry_safe(rq, n, running, flush.list) {
266	259	unsigned int seq = blk_flush_cur_seq(rq);
267	260
268	261	BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
269		- queued \|= blk_flush_complete_seq(rq, fq, seq, error);
	262	+ blk_flush_complete_seq(rq, fq, seq, error);
270	263	}
271	264
272		- /*
273		- * Kick the queue to avoid stall for two cases:
274		- * 1. Moving a request silently to empty queue_head may stall the
275		- * queue.
276		- * 2. When flush request is running in non-queueable queue, the
277		- * queue is hold. Restart the queue after flush request is finished
278		- * to avoid stall.
279		- * This function is called from request completion path and calling
280		- * directly into request_fn may confuse the driver. Always use
281		- * kblockd.
282		- */
283		- if (queued \|\| fq->flush_queue_delayed) {
284		- WARN_ON(q->mq_ops);
285		- blk_run_queue_async(q);
286		- }
287		- fq->flush_queue_delayed = 0;
288		- if (q->mq_ops)
289		- spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
	265	+ spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
	266	+}
	267	+
	268	+bool is_flush_rq(struct request *rq)
	269	+{
	270	+ return rq->end_io == flush_end_io;
290	271	}
291	272
292	273	/**
..	..	@@ -299,12 +280,10 @@
299	280	* Please read the comment at the top of this file for more info.
300	281	*
301	282	* CONTEXT:
302		- * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
	283	+ * spin_lock_irq(fq->mq_flush_lock)
303	284	*
304		- * RETURNS:
305		- * %true if flush was issued, %false otherwise.
306	285	*/
307		-static bool blk_kick_flush(struct request_queue q, struct blk_flush_queue fq,
	286	+static void blk_kick_flush(struct request_queue q, struct blk_flush_queue fq,
308	287	unsigned int flags)
309	288	{
310	289	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
..	..	@@ -314,19 +293,13 @@
314	293
315	294	/* C1 described at the top of this file */
316	295	if (fq->flush_pending_idx != fq->flush_running_idx \|\| list_empty(pending))
317		- return false;
	296	+ return;
318	297
319		- /* C2 and C3
320		- *
321		- * For blk-mq + scheduling, we can risk having all driver tags
322		- * assigned to empty flushes, and we deadlock if we are expecting
323		- * other requests to make progress. Don't defer for that case.
324		- */
	298	+ /* C2 and C3 */
325	299	if (!list_empty(&fq->flush_data_in_flight) &&
326		- !(q->mq_ops && q->elevator) &&
327	300	time_before(jiffies,
328	301	fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
329		- return false;
	302	+ return;
330	303
331	304	/*
332	305	* Issue flush and toggle pending_idx. This makes pending_idx
..	..	@@ -344,86 +317,49 @@
344	317	* In case of IO scheduler, flush rq need to borrow scheduler tag
345	318	* just for cheating put/get driver tag.
346	319	*/
347		- if (q->mq_ops) {
348		- struct blk_mq_hw_ctx *hctx;
	320	+ flush_rq->mq_ctx = first_rq->mq_ctx;
	321	+ flush_rq->mq_hctx = first_rq->mq_hctx;
349	322
350		- flush_rq->mq_ctx = first_rq->mq_ctx;
	323	+ if (!q->elevator) {
	324	+ flush_rq->tag = first_rq->tag;
351	325
352		- if (!q->elevator) {
353		- fq->orig_rq = first_rq;
354		- flush_rq->tag = first_rq->tag;
355		- hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
356		- blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
357		- } else {
358		- flush_rq->internal_tag = first_rq->internal_tag;
359		- }
360		- }
	326	+ /*
	327	+ * We borrow data request's driver tag, so have to mark
	328	+ * this flush request as INFLIGHT for avoiding double
	329	+ * account of this driver tag
	330	+ */
	331	+ flush_rq->rq_flags \|= RQF_MQ_INFLIGHT;
	332	+ } else
	333	+ flush_rq->internal_tag = first_rq->internal_tag;
361	334
362	335	flush_rq->cmd_flags = REQ_OP_FLUSH \| REQ_PREFLUSH;
363	336	flush_rq->cmd_flags \|= (flags & REQ_DRV) \| (flags & REQ_FAILFAST_MASK);
364	337	flush_rq->rq_flags \|= RQF_FLUSH_SEQ;
365	338	flush_rq->rq_disk = first_rq->rq_disk;
366	339	flush_rq->end_io = flush_end_io;
367		-
368		- return blk_flush_queue_rq(flush_rq, false);
369		-}
370		-
371		-static void flush_data_end_io(struct request *rq, blk_status_t error)
372		-{
373		- struct request_queue *q = rq->q;
374		- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
375		-
376		- lockdep_assert_held(q->queue_lock);
377		-
378	340	/*
379		- * Updating q->in_flight[] here for making this tag usable
380		- * early. Because in blk_queue_start_tag(),
381		- * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and
382		- * reserve tags for sync I/O.
383		- *
384		- * More importantly this way can avoid the following I/O
385		- * deadlock:
386		- *
387		- * - suppose there are 40 fua requests comming to flush queue
388		- * and queue depth is 31
389		- * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc
390		- * tag for async I/O any more
391		- * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT
392		- * and flush_data_end_io() is called
393		- * - the other rqs still can't go ahead if not updating
394		- * q->in_flight[BLK_RW_ASYNC] here, meantime these rqs
395		- * are held in flush data queue and make no progress of
396		- * handling post flush rq
397		- * - only after the post flush rq is handled, all these rqs
398		- * can be completed
	341	+ * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
	342	+ * implied in refcount_inc_not_zero() called from
	343	+ * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref
	344	+ * and READ flush_rq->end_io
399	345	*/
	346	+ smp_wmb();
	347	+ refcount_set(&flush_rq->ref, 1);
400	348
401		- elv_completed_request(q, rq);
402		-
403		- /* for avoiding double accounting */
404		- rq->rq_flags &= ~RQF_STARTED;
405		-
406		- /*
407		- * After populating an empty queue, kick it to avoid stall. Read
408		- * the comment in flush_end_io().
409		- */
410		- if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
411		- blk_run_queue_async(q);
	349	+ blk_flush_queue_rq(flush_rq, false);
412	350	}
413	351
414	352	static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
415	353	{
416	354	struct request_queue *q = rq->q;
417		- struct blk_mq_hw_ctx *hctx;
	355	+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
418	356	struct blk_mq_ctx *ctx = rq->mq_ctx;
419	357	unsigned long flags;
420	358	struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
421	359
422		- hctx = blk_mq_map_queue(q, ctx->cpu);
423		-
424	360	if (q->elevator) {
425	361	WARN_ON(rq->tag < 0);
426		- blk_mq_put_driver_tag_hctx(hctx, rq);
	362	+ blk_mq_put_driver_tag(rq);
427	363	}
428	364
429	365	/*
..	..	@@ -453,9 +389,6 @@
453	389	unsigned int policy = blk_flush_policy(fflags, rq);
454	390	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
455	391
456		- if (!q->mq_ops)
457		- lockdep_assert_held(q->queue_lock);
458		-
459	392	/*
460	393	* @policy now records what operations need to be done. Adjust
461	394	* REQ_PREFLUSH and FUA for the driver.
..	..	@@ -478,10 +411,7 @@
478	411	* complete the request.
479	412	*/
480	413	if (!policy) {
481		- if (q->mq_ops)
482		- blk_mq_end_request(rq, 0);
483		- else
484		- __blk_end_request(rq, 0, 0);
	414	+ blk_mq_end_request(rq, 0);
485	415	return;
486	416	}
487	417
..	..	@@ -494,10 +424,7 @@
494	424	*/
495	425	if ((policy & REQ_FSEQ_DATA) &&
496	426	!(policy & (REQ_FSEQ_PREFLUSH \| REQ_FSEQ_POSTFLUSH))) {
497		- if (q->mq_ops)
498		- blk_mq_request_bypass_insert(rq, false);
499		- else
500		- list_add_tail(&rq->queuelist, &q->queue_head);
	427	+ blk_mq_request_bypass_insert(rq, false, false);
501	428	return;
502	429	}
503	430
..	..	@@ -509,74 +436,39 @@
509	436	INIT_LIST_HEAD(&rq->flush.list);
510	437	rq->rq_flags \|= RQF_FLUSH_SEQ;
511	438	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
512		- if (q->mq_ops) {
513		- rq->end_io = mq_flush_data_end_io;
514	439
515		- spin_lock_irq(&fq->mq_flush_lock);
516		- blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
517		- spin_unlock_irq(&fq->mq_flush_lock);
518		- return;
519		- }
520		- rq->end_io = flush_data_end_io;
	440	+ rq->end_io = mq_flush_data_end_io;
521	441
	442	+ spin_lock_irq(&fq->mq_flush_lock);
522	443	blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
	444	+ spin_unlock_irq(&fq->mq_flush_lock);
523	445	}
524	446
525	447	/**
526	448	* blkdev_issue_flush - queue a flush
527	449	* @bdev: blockdev to issue flush for
528	450	* @gfp_mask: memory allocation flags (for bio_alloc)
529		- * @error_sector: error sector
530	451	*
531	452	* Description:
532		- * Issue a flush for the block device in question. Caller can supply
533		- * room for storing the error offset in case of a flush error, if they
534		- * wish to.
	453	+ * Issue a flush for the block device in question.
535	454	*/
536		-int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
537		- sector_t *error_sector)
	455	+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
538	456	{
539		- struct request_queue *q;
540	457	struct bio *bio;
541	458	int ret = 0;
542		-
543		- if (bdev->bd_disk == NULL)
544		- return -ENXIO;
545		-
546		- q = bdev_get_queue(bdev);
547		- if (!q)
548		- return -ENXIO;
549		-
550		- /*
551		- * some block devices may not have their queue correctly set up here
552		- * (e.g. loop device without a backing file) and so issuing a flush
553		- * here will panic. Ensure there is a request function before issuing
554		- * the flush.
555		- */
556		- if (!q->make_request_fn)
557		- return -ENXIO;
558	459
559	460	bio = bio_alloc(gfp_mask, 0);
560	461	bio_set_dev(bio, bdev);
561	462	bio->bi_opf = REQ_OP_WRITE \| REQ_PREFLUSH;
562	463
563	464	ret = submit_bio_wait(bio);
564		-
565		- /*
566		- * The driver must store the error location in ->bi_sector, if
567		- * it supports it. For non-stacked drivers, this should be
568		- * copied from blk_rq_pos(rq).
569		- */
570		- if (error_sector)
571		- *error_sector = bio->bi_iter.bi_sector;
572		-
573	465	bio_put(bio);
574	466	return ret;
575	467	}
576	468	EXPORT_SYMBOL(blkdev_issue_flush);
577	469
578		-struct blk_flush_queue blk_alloc_flush_queue(struct request_queue q,
579		- int node, int cmd_size, gfp_t flags)
	470	+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
	471	+ gfp_t flags)
580	472	{
581	473	struct blk_flush_queue *fq;
582	474	int rq_sz = sizeof(struct request);
..	..	@@ -585,8 +477,7 @@
585	477	if (!fq)
586	478	goto fail;
587	479
588		- if (q->mq_ops)
589		- spin_lock_init(&fq->mq_flush_lock);
	480	+ spin_lock_init(&fq->mq_flush_lock);
590	481
591	482	rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
592	483	fq->flush_rq = kzalloc_node(rq_sz, flags, node);
..	..	@@ -596,6 +487,9 @@
596	487	INIT_LIST_HEAD(&fq->flush_queue[0]);
597	488	INIT_LIST_HEAD(&fq->flush_queue[1]);
598	489	INIT_LIST_HEAD(&fq->flush_data_in_flight);
	490	+
	491	+ lockdep_register_key(&fq->key);
	492	+ lockdep_set_class(&fq->mq_flush_lock, &fq->key);
599	493
600	494	return fq;
601	495
..	..	@@ -611,6 +505,7 @@
611	505	if (!fq)
612	506	return;
613	507
	508	+ lockdep_unregister_key(&fq->key);
614	509	kfree(fq->flush_rq);
615	510	kfree(fq);
616	511	}