~hc/RK356X_SDK_RELEASE.git

..	..	@@ -63,9 +63,31 @@
63	63	}
64	64
65	65	/*
	66	+ * Base amount of descriptor blocks we reserve for each transaction.
	67	+ */
	68	+static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
	69	+{
	70	+ int tag_space = journal->j_blocksize - sizeof(journal_header_t);
	71	+ int tags_per_block;
	72	+
	73	+ /* Subtract UUID */
	74	+ tag_space -= 16;
	75	+ if (jbd2_journal_has_csum_v2or3(journal))
	76	+ tag_space -= sizeof(struct jbd2_journal_block_tail);
	77	+ /* Commit code leaves a slack space of 16 bytes at the end of block */
	78	+ tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
	79	+ /*
	80	+ * Revoke descriptors are accounted separately so we need to reserve
	81	+ * space for commit block and normal transaction descriptor blocks.
	82	+ */
	83	+ return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
	84	+ tags_per_block);
	85	+}
	86	+
	87	+/*
66	88	* jbd2_get_transaction: obtain a new transaction_t object.
67	89	*
68		- * Simply allocate and initialise a new transaction. Create it in
	90	+ * Simply initialise a new transaction. Initialize it in
69	91	* RUNNING state and add it to the current journal (which should not
70	92	* have an existing running transaction: we only make a new transaction
71	93	* once we have started to commit the old one).
..	..	@@ -77,8 +99,8 @@
77	99	*
78	100	*/
79	101
80		-static transaction_t *
81		-jbd2_get_transaction(journal_t journal, transaction_t transaction)
	102	+static void jbd2_get_transaction(journal_t *journal,
	103	+ transaction_t *transaction)
82	104	{
83	105	transaction->t_journal = journal;
84	106	transaction->t_state = T_RUNNING;
..	..	@@ -88,7 +110,9 @@
88	110	spin_lock_init(&transaction->t_handle_lock);
89	111	atomic_set(&transaction->t_updates, 0);
90	112	atomic_set(&transaction->t_outstanding_credits,
	113	+ jbd2_descriptor_blocks_per_trans(journal) +
91	114	atomic_read(&journal->j_reserved_credits));
	115	+ atomic_set(&transaction->t_outstanding_revokes, 0);
92	116	atomic_set(&transaction->t_handle_count, 0);
93	117	INIT_LIST_HEAD(&transaction->t_inode_list);
94	118	INIT_LIST_HEAD(&transaction->t_private_list);
..	..	@@ -102,8 +126,6 @@
102	126	transaction->t_max_wait = 0;
103	127	transaction->t_start = jiffies;
104	128	transaction->t_requested = 0;
105		-
106		- return transaction;
107	129	}
108	130
109	131	/*
..	..	@@ -140,9 +162,9 @@
140	162	}
141	163
142	164	/*
143		- * Wait until running transaction passes T_LOCKED state. Also starts the commit
144		- * if needed. The function expects running transaction to exist and releases
145		- * j_state_lock.
	165	+ * Wait until running transaction passes to T_FLUSH state and new transaction
	166	+ * can thus be started. Also starts the commit if needed. The function expects
	167	+ * running transaction to exist and releases j_state_lock.
146	168	*/
147	169	static void wait_transaction_locked(journal_t *journal)
148	170	__releases(journal->j_state_lock)
..	..	@@ -151,13 +173,41 @@
151	173	int need_to_start;
152	174	tid_t tid = journal->j_running_transaction->t_tid;
153	175
154		- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
	176	+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
155	177	TASK_UNINTERRUPTIBLE);
156	178	need_to_start = !tid_geq(journal->j_commit_request, tid);
157	179	read_unlock(&journal->j_state_lock);
158	180	if (need_to_start)
159	181	jbd2_log_start_commit(journal, tid);
160	182	jbd2_might_wait_for_commit(journal);
	183	+ schedule();
	184	+ finish_wait(&journal->j_wait_transaction_locked, &wait);
	185	+}
	186	+
	187	+/*
	188	+ * Wait until running transaction transitions from T_SWITCH to T_FLUSH
	189	+ * state and new transaction can thus be started. The function releases
	190	+ * j_state_lock.
	191	+ */
	192	+static void wait_transaction_switching(journal_t *journal)
	193	+ __releases(journal->j_state_lock)
	194	+{
	195	+ DEFINE_WAIT(wait);
	196	+
	197	+ if (WARN_ON(!journal->j_running_transaction \|\|
	198	+ journal->j_running_transaction->t_state != T_SWITCH)) {
	199	+ read_unlock(&journal->j_state_lock);
	200	+ return;
	201	+ }
	202	+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
	203	+ TASK_UNINTERRUPTIBLE);
	204	+ read_unlock(&journal->j_state_lock);
	205	+ /*
	206	+ * We don't call jbd2_might_wait_for_commit() here as there's no
	207	+ * waiting for outstanding handles happening anymore in T_SWITCH state
	208	+ * and handling of reserved handles actually relies on that for
	209	+ * correctness.
	210	+ */
161	211	schedule();
162	212	finish_wait(&journal->j_wait_transaction_locked, &wait);
163	213	}
..	..	@@ -185,7 +235,8 @@
185	235	* If the current transaction is locked down for commit, wait
186	236	* for the lock to be released.
187	237	*/
188		- if (t->t_state == T_LOCKED) {
	238	+ if (t->t_state != T_RUNNING) {
	239	+ WARN_ON_ONCE(t->t_state >= T_FLUSH);
189	240	wait_transaction_locked(journal);
190	241	return 1;
191	242	}
..	..	@@ -233,12 +284,13 @@
233	284	* before starting to dirty potentially checkpointed buffers
234	285	* in the new transaction.
235	286	*/
236		- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
	287	+ if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
237	288	atomic_sub(total, &t->t_outstanding_credits);
238	289	read_unlock(&journal->j_state_lock);
239	290	jbd2_might_wait_for_commit(journal);
240	291	write_lock(&journal->j_state_lock);
241		- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
	292	+ if (jbd2_log_space_left(journal) <
	293	+ journal->j_max_transaction_buffers)
242	294	__jbd2_log_wait_for_space(journal);
243	295	write_unlock(&journal->j_state_lock);
244	296	return 1;
..	..	@@ -274,12 +326,12 @@
274	326	gfp_t gfp_mask)
275	327	{
276	328	transaction_t transaction, new_transaction = NULL;
277		- int blocks = handle->h_buffer_credits;
	329	+ int blocks = handle->h_total_credits;
278	330	int rsv_blocks = 0;
279	331	unsigned long ts = jiffies;
280	332
281	333	if (handle->h_rsv_handle)
282		- rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
	334	+ rsv_blocks = handle->h_rsv_handle->h_total_credits;
283	335
284	336	/*
285	337	* Limit the number of reserved credits to 1/2 of maximum transaction
..	..	@@ -297,7 +349,12 @@
297	349	}
298	350
299	351	alloc_transaction:
300		- if (!journal->j_running_transaction) {
	352	+ /*
	353	+ * This check is racy but it is just an optimization of allocating new
	354	+ * transaction early if there are high chances we'll need it. If we
	355	+ * guess wrong, we'll retry or free unused transaction.
	356	+ */
	357	+ if (!data_race(journal->j_running_transaction)) {
301	358	/*
302	359	* If __GFP_FS is not present, then we may be being called from
303	360	* inside the fs writeback layer, so we MUST NOT fail.
..	..	@@ -362,8 +419,14 @@
362	419	/*
363	420	* We have handle reserved so we are allowed to join T_LOCKED
364	421	* transaction and we don't have to check for transaction size
365		- * and journal space.
	422	+ * and journal space. But we still have to wait while running
	423	+ * transaction is being switched to a committing one as it
	424	+ * won't wait for any handles anymore.
366	425	*/
	426	+ if (transaction->t_state == T_SWITCH) {
	427	+ wait_transaction_switching(journal);
	428	+ goto repeat;
	429	+ }
367	430	sub_reserved_credits(journal, blocks);
368	431	handle->h_reserved = 0;
369	432	}
..	..	@@ -374,6 +437,7 @@
374	437	update_t_max_wait(transaction, ts);
375	438	handle->h_transaction = transaction;
376	439	handle->h_requested_credits = blocks;
	440	+ handle->h_revoke_credits_requested = handle->h_revoke_credits;
377	441	handle->h_start_jiffies = jiffies;
378	442	atomic_inc(&transaction->t_updates);
379	443	atomic_inc(&transaction->t_handle_count);
..	..	@@ -400,15 +464,15 @@
400	464	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
401	465	if (!handle)
402	466	return NULL;
403		- handle->h_buffer_credits = nblocks;
	467	+ handle->h_total_credits = nblocks;
404	468	handle->h_ref = 1;
405	469
406	470	return handle;
407	471	}
408	472
409	473	handle_t jbd2__journal_start(journal_t journal, int nblocks, int rsv_blocks,
410		- gfp_t gfp_mask, unsigned int type,
411		- unsigned int line_no)
	474	+ int revoke_records, gfp_t gfp_mask,
	475	+ unsigned int type, unsigned int line_no)
412	476	{
413	477	handle_t *handle = journal_current_handle();
414	478	int err;
..	..	@@ -422,6 +486,8 @@
422	486	return handle;
423	487	}
424	488
	489	+ nblocks += DIV_ROUND_UP(revoke_records,
	490	+ journal->j_revoke_records_per_block);
425	491	handle = new_handle(nblocks);
426	492	if (!handle)
427	493	return ERR_PTR(-ENOMEM);
..	..	@@ -437,6 +503,7 @@
437	503	rsv_handle->h_journal = journal;
438	504	handle->h_rsv_handle = rsv_handle;
439	505	}
	506	+ handle->h_revoke_credits = revoke_records;
440	507
441	508	err = start_this_handle(journal, handle, gfp_mask);
442	509	if (err < 0) {
..	..	@@ -457,7 +524,7 @@
457	524
458	525
459	526	/**
460		- * handle_t *jbd2_journal_start() - Obtain a new handle.
	527	+ * jbd2_journal_start() - Obtain a new handle.
461	528	* @journal: Journal to start transaction on.
462	529	* @nblocks: number of block buffer we might modify
463	530	*
..	..	@@ -465,7 +532,7 @@
465	532	* modified buffers in the log. We block until the log can guarantee
466	533	* that much space. Additionally, if rsv_blocks > 0, we also create another
467	534	* handle with rsv_blocks reserved blocks in the journal. This handle is
468		- * is stored in h_rsv_handle. It is not attached to any particular transaction
	535	+ * stored in h_rsv_handle. It is not attached to any particular transaction
469	536	* and thus doesn't block transaction commit. If the caller uses this reserved
470	537	* handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
471	538	* on the parent handle will dispose the reserved one. Reserved handle has to
..	..	@@ -477,22 +544,34 @@
477	544	*/
478	545	handle_t jbd2_journal_start(journal_t journal, int nblocks)
479	546	{
480		- return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
	547	+ return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
481	548	}
482	549	EXPORT_SYMBOL(jbd2_journal_start);
	550	+
	551	+static void __jbd2_journal_unreserve_handle(handle_t handle, transaction_t t)
	552	+{
	553	+ journal_t *journal = handle->h_journal;
	554	+
	555	+ WARN_ON(!handle->h_reserved);
	556	+ sub_reserved_credits(journal, handle->h_total_credits);
	557	+ if (t)
	558	+ atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
	559	+}
483	560
484	561	void jbd2_journal_free_reserved(handle_t *handle)
485	562	{
486	563	journal_t *journal = handle->h_journal;
487	564
488		- WARN_ON(!handle->h_reserved);
489		- sub_reserved_credits(journal, handle->h_buffer_credits);
	565	+ /* Get j_state_lock to pin running transaction if it exists */
	566	+ read_lock(&journal->j_state_lock);
	567	+ __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
	568	+ read_unlock(&journal->j_state_lock);
490	569	jbd2_free_handle(handle);
491	570	}
492	571	EXPORT_SYMBOL(jbd2_journal_free_reserved);
493	572
494	573	/**
495		- * int jbd2_journal_start_reserved() - start reserved handle
	574	+ * jbd2_journal_start_reserved() - start reserved handle
496	575	* @handle: handle to start
497	576	* @type: for handle statistics
498	577	* @line_no: for handle statistics
..	..	@@ -538,14 +617,18 @@
538	617	}
539	618	handle->h_type = type;
540	619	handle->h_line_no = line_no;
	620	+ trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
	621	+ handle->h_transaction->t_tid, type,
	622	+ line_no, handle->h_total_credits);
541	623	return 0;
542	624	}
543	625	EXPORT_SYMBOL(jbd2_journal_start_reserved);
544	626
545	627	/**
546		- * int jbd2_journal_extend() - extend buffer credits.
	628	+ * jbd2_journal_extend() - extend buffer credits.
547	629	* @handle: handle to 'extend'
548	630	* @nblocks: nr blocks to try to extend by.
	631	+ * @revoke_records: number of revoke records to try to extend by.
549	632	*
550	633	* Some transactions, such as large extends and truncates, can be done
551	634	* atomically all at once or in several stages. The operation requests
..	..	@@ -562,7 +645,7 @@
562	645	* return code < 0 implies an error
563	646	* return code > 0 implies normal transaction-full status.
564	647	*/
565		-int jbd2_journal_extend(handle_t *handle, int nblocks)
	648	+int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
566	649	{
567	650	transaction_t *transaction = handle->h_transaction;
568	651	journal_t *journal;
..	..	@@ -584,6 +667,12 @@
584	667	goto error_out;
585	668	}
586	669
	670	+ nblocks += DIV_ROUND_UP(
	671	+ handle->h_revoke_credits_requested + revoke_records,
	672	+ journal->j_revoke_records_per_block) -
	673	+ DIV_ROUND_UP(
	674	+ handle->h_revoke_credits_requested,
	675	+ journal->j_revoke_records_per_block);
587	676	spin_lock(&transaction->t_handle_lock);
588	677	wanted = atomic_add_return(nblocks,
589	678	&transaction->t_outstanding_credits);
..	..	@@ -595,22 +684,16 @@
595	684	goto unlock;
596	685	}
597	686
598		- if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
599		- jbd2_log_space_left(journal)) {
600		- jbd_debug(3, "denied handle %p %d blocks: "
601		- "insufficient log space\n", handle, nblocks);
602		- atomic_sub(nblocks, &transaction->t_outstanding_credits);
603		- goto unlock;
604		- }
605		-
606	687	trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
607	688	transaction->t_tid,
608	689	handle->h_type, handle->h_line_no,
609		- handle->h_buffer_credits,
	690	+ handle->h_total_credits,
610	691	nblocks);
611	692
612		- handle->h_buffer_credits += nblocks;
	693	+ handle->h_total_credits += nblocks;
613	694	handle->h_requested_credits += nblocks;
	695	+ handle->h_revoke_credits += revoke_records;
	696	+ handle->h_revoke_credits_requested += revoke_records;
614	697	result = 0;
615	698
616	699	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
..	..	@@ -621,11 +704,56 @@
621	704	return result;
622	705	}
623	706
	707	+static void stop_this_handle(handle_t *handle)
	708	+{
	709	+ transaction_t *transaction = handle->h_transaction;
	710	+ journal_t *journal = transaction->t_journal;
	711	+ int revokes;
	712	+
	713	+ J_ASSERT(journal_current_handle() == handle);
	714	+ J_ASSERT(atomic_read(&transaction->t_updates) > 0);
	715	+ current->journal_info = NULL;
	716	+ /*
	717	+ * Subtract necessary revoke descriptor blocks from handle credits. We
	718	+ * take care to account only for revoke descriptor blocks the
	719	+ * transaction will really need as large sequences of transactions with
	720	+ * small numbers of revokes are relatively common.
	721	+ */
	722	+ revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
	723	+ if (revokes) {
	724	+ int t_revokes, revoke_descriptors;
	725	+ int rr_per_blk = journal->j_revoke_records_per_block;
	726	+
	727	+ WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
	728	+ > handle->h_total_credits);
	729	+ t_revokes = atomic_add_return(revokes,
	730	+ &transaction->t_outstanding_revokes);
	731	+ revoke_descriptors =
	732	+ DIV_ROUND_UP(t_revokes, rr_per_blk) -
	733	+ DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
	734	+ handle->h_total_credits -= revoke_descriptors;
	735	+ }
	736	+ atomic_sub(handle->h_total_credits,
	737	+ &transaction->t_outstanding_credits);
	738	+ if (handle->h_rsv_handle)
	739	+ __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
	740	+ transaction);
	741	+ if (atomic_dec_and_test(&transaction->t_updates))
	742	+ wake_up(&journal->j_wait_updates);
	743	+
	744	+ rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
	745	+ /*
	746	+ * Scope of the GFP_NOFS context is over here and so we can restore the
	747	+ * original alloc context.
	748	+ */
	749	+ memalloc_nofs_restore(handle->saved_alloc_context);
	750	+}
624	751
625	752	/**
626		- * int jbd2_journal_restart() - restart a handle .
	753	+ * jbd2__journal_restart() - restart a handle .
627	754	* @handle: handle to restart
628	755	* @nblocks: nr credits requested
	756	+ * @revoke_records: number of revoke record credits requested
629	757	* @gfp_mask: memory allocation flags (for start_this_handle)
630	758	*
631	759	* Restart a handle for a multi-transaction filesystem
..	..	@@ -638,56 +766,48 @@
638	766	* credits. We preserve reserved handle if there's any attached to the
639	767	* passed in handle.
640	768	*/
641		-int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
	769	+int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
	770	+ gfp_t gfp_mask)
642	771	{
643	772	transaction_t *transaction = handle->h_transaction;
644	773	journal_t *journal;
645	774	tid_t tid;
646		- int need_to_start, ret;
	775	+ int need_to_start;
	776	+ int ret;
647	777
648	778	/* If we've had an abort of any type, don't even think about
649	779	* actually doing the restart! */
650	780	if (is_handle_aborted(handle))
651	781	return 0;
652	782	journal = transaction->t_journal;
	783	+ tid = transaction->t_tid;
653	784
654	785	/*
655	786	* First unlink the handle from its current transaction, and start the
656	787	* commit on that.
657	788	*/
658		- J_ASSERT(atomic_read(&transaction->t_updates) > 0);
659		- J_ASSERT(journal_current_handle() == handle);
660		-
661		- read_lock(&journal->j_state_lock);
662		- spin_lock(&transaction->t_handle_lock);
663		- atomic_sub(handle->h_buffer_credits,
664		- &transaction->t_outstanding_credits);
665		- if (handle->h_rsv_handle) {
666		- sub_reserved_credits(journal,
667		- handle->h_rsv_handle->h_buffer_credits);
668		- }
669		- if (atomic_dec_and_test(&transaction->t_updates))
670		- wake_up(&journal->j_wait_updates);
671		- tid = transaction->t_tid;
672		- spin_unlock(&transaction->t_handle_lock);
673		- handle->h_transaction = NULL;
674		- current->journal_info = NULL;
675		-
676	789	jbd_debug(2, "restarting handle %p\n", handle);
	790	+ stop_this_handle(handle);
	791	+ handle->h_transaction = NULL;
	792	+
	793	+ /*
	794	+ * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
	795	+ * get rid of pointless j_state_lock traffic like this.
	796	+ */
	797	+ read_lock(&journal->j_state_lock);
677	798	need_to_start = !tid_geq(journal->j_commit_request, tid);
678	799	read_unlock(&journal->j_state_lock);
679	800	if (need_to_start)
680	801	jbd2_log_start_commit(journal, tid);
681		-
682		- rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
683		- handle->h_buffer_credits = nblocks;
684		- /*
685		- * Restore the original nofs context because the journal restart
686		- * is basically the same thing as journal stop and start.
687		- * start_this_handle will start a new nofs context.
688		- */
689		- memalloc_nofs_restore(handle->saved_alloc_context);
	802	+ handle->h_total_credits = nblocks +
	803	+ DIV_ROUND_UP(revoke_records,
	804	+ journal->j_revoke_records_per_block);
	805	+ handle->h_revoke_credits = revoke_records;
690	806	ret = start_this_handle(journal, handle, gfp_mask);
	807	+ trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
	808	+ ret ? 0 : handle->h_transaction->t_tid,
	809	+ handle->h_type, handle->h_line_no,
	810	+ handle->h_total_credits);
691	811	return ret;
692	812	}
693	813	EXPORT_SYMBOL(jbd2__journal_restart);
..	..	@@ -695,12 +815,12 @@
695	815
696	816	int jbd2_journal_restart(handle_t *handle, int nblocks)
697	817	{
698		- return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
	818	+ return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
699	819	}
700	820	EXPORT_SYMBOL(jbd2_journal_restart);
701	821
702	822	/**
703		- * void jbd2_journal_lock_updates () - establish a transaction barrier.
	823	+ * jbd2_journal_lock_updates () - establish a transaction barrier.
704	824	* @journal: Journal to establish a barrier on.
705	825	*
706	826	* This locks out any further updates from being started, and blocks
..	..	@@ -759,7 +879,7 @@
759	879	}
760	880
761	881	/**
762		- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
	882	+ * jbd2_journal_unlock_updates () - release barrier
763	883	* @journal: Journal to release the barrier on.
764	884	*
765	885	* Release a transaction barrier obtained with jbd2_journal_lock_updates().
..	..	@@ -774,7 +894,7 @@
774	894	write_lock(&journal->j_state_lock);
775	895	--journal->j_barrier_count;
776	896	write_unlock(&journal->j_state_lock);
777		- wake_up(&journal->j_wait_transaction_locked);
	897	+ wake_up_all(&journal->j_wait_transaction_locked);
778	898	}
779	899
780	900	static void warn_dirty_buffer(struct buffer_head *bh)
..	..	@@ -843,7 +963,7 @@
843	963
844	964	start_lock = jiffies;
845	965	lock_buffer(bh);
846		- jbd_lock_bh_state(bh);
	966	+ spin_lock(&jh->b_state_lock);
847	967
848	968	/* If it takes too long to lock the buffer, trace it */
849	969	time_lock = jbd2_time_diff(start_lock, jiffies);
..	..	@@ -864,36 +984,28 @@
864	984	* ie. locked but not dirty) or tune2fs (which may actually have
865	985	* the buffer dirtied, ugh.) */
866	986
867		- if (buffer_dirty(bh)) {
	987	+ if (buffer_dirty(bh) && jh->b_transaction) {
	988	+ warn_dirty_buffer(bh);
868	989	/*
869		- * First question: is this buffer already part of the current
870		- * transaction or the existing committing transaction?
871		- */
872		- if (jh->b_transaction) {
873		- J_ASSERT_JH(jh,
874		- jh->b_transaction == transaction \|\|
875		- jh->b_transaction ==
876		- journal->j_committing_transaction);
877		- if (jh->b_next_transaction)
878		- J_ASSERT_JH(jh, jh->b_next_transaction ==
879		- transaction);
880		- warn_dirty_buffer(bh);
881		- }
882		- /*
883		- * In any case we need to clean the dirty flag and we must
884		- * do it under the buffer lock to be sure we don't race
885		- * with running write-out.
	990	+ * We need to clean the dirty flag and we must do it under the
	991	+ * buffer lock to be sure we don't race with running write-out.
886	992	*/
887	993	JBUFFER_TRACE(jh, "Journalling dirty buffer");
888	994	clear_buffer_dirty(bh);
	995	+ /*
	996	+ * The buffer is going to be added to BJ_Reserved list now and
	997	+ * nothing guarantees jbd2_journal_dirty_metadata() will be
	998	+ * ever called for it. So we need to set jbddirty bit here to
	999	+ * make sure the buffer is dirtied and written out when the
	1000	+ * journaling machinery is done with it.
	1001	+ */
889	1002	set_buffer_jbddirty(bh);
890	1003	}
891	1004
892		- unlock_buffer(bh);
893		-
894	1005	error = -EROFS;
895	1006	if (is_handle_aborted(handle)) {
896		- jbd_unlock_bh_state(bh);
	1007	+ spin_unlock(&jh->b_state_lock);
	1008	+ unlock_buffer(bh);
897	1009	goto out;
898	1010	}
899	1011	error = 0;
..	..	@@ -903,14 +1015,16 @@
903	1015	* b_next_transaction points to it
904	1016	*/
905	1017	if (jh->b_transaction == transaction \|\|
906		- jh->b_next_transaction == transaction)
	1018	+ jh->b_next_transaction == transaction) {
	1019	+ unlock_buffer(bh);
907	1020	goto done;
	1021	+ }
908	1022
909	1023	/*
910	1024	* this is the first time this transaction is touching this buffer,
911	1025	* reset the modified flag
912	1026	*/
913		- jh->b_modified = 0;
	1027	+ jh->b_modified = 0;
914	1028
915	1029	/*
916	1030	* If the buffer is not journaled right now, we need to make sure it
..	..	@@ -928,10 +1042,24 @@
928	1042	*/
929	1043	smp_wmb();
930	1044	spin_lock(&journal->j_list_lock);
	1045	+ if (test_clear_buffer_dirty(bh)) {
	1046	+ /*
	1047	+ * Execute buffer dirty clearing and jh->b_transaction
	1048	+ * assignment under journal->j_list_lock locked to
	1049	+ * prevent bh being removed from checkpoint list if
	1050	+ * the buffer is in an intermediate state (not dirty
	1051	+ * and jh->b_transaction is NULL).
	1052	+ */
	1053	+ JBUFFER_TRACE(jh, "Journalling dirty buffer");
	1054	+ set_buffer_jbddirty(bh);
	1055	+ }
931	1056	__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
932	1057	spin_unlock(&journal->j_list_lock);
	1058	+ unlock_buffer(bh);
933	1059	goto done;
934	1060	}
	1061	+ unlock_buffer(bh);
	1062	+
935	1063	/*
936	1064	* If there is already a copy-out version of this buffer, then we don't
937	1065	* need to make another one
..	..	@@ -957,7 +1085,7 @@
957	1085	*/
958	1086	if (buffer_shadow(bh)) {
959	1087	JBUFFER_TRACE(jh, "on shadow: sleep");
960		- jbd_unlock_bh_state(bh);
	1088	+ spin_unlock(&jh->b_state_lock);
961	1089	wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
962	1090	goto repeat;
963	1091	}
..	..	@@ -978,7 +1106,7 @@
978	1106	JBUFFER_TRACE(jh, "generate frozen data");
979	1107	if (!frozen_buffer) {
980	1108	JBUFFER_TRACE(jh, "allocate memory for buffer");
981		- jbd_unlock_bh_state(bh);
	1109	+ spin_unlock(&jh->b_state_lock);
982	1110	frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
983	1111	GFP_NOFS \| __GFP_NOFAIL);
984	1112	goto repeat;
..	..	@@ -997,7 +1125,7 @@
997	1125	jh->b_next_transaction = transaction;
998	1126
999	1127	done:
1000		- jbd_unlock_bh_state(bh);
	1128	+ spin_unlock(&jh->b_state_lock);
1001	1129
1002	1130	/*
1003	1131	* If we are about to journal a buffer, then any revoke pending on it is
..	..	@@ -1067,7 +1195,8 @@
1067	1195	}
1068	1196
1069	1197	/**
1070		- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
	1198	+ * jbd2_journal_get_write_access() - notify intent to modify a buffer
	1199	+ * for metadata (not data) update.
1071	1200	* @handle: transaction to add buffer modifications to
1072	1201	* @bh: bh to be used for metadata writes
1073	1202	*
..	..	@@ -1111,7 +1240,7 @@
1111	1240	* unlocked buffer beforehand. */
1112	1241
1113	1242	/**
1114		- * int jbd2_journal_get_create_access () - notify intent to use newly created bh
	1243	+ * jbd2_journal_get_create_access () - notify intent to use newly created bh
1115	1244	* @handle: transaction to new buffer to
1116	1245	* @bh: new buffer.
1117	1246	*
..	..	@@ -1139,7 +1268,7 @@
1139	1268	* that case: the transaction must have deleted the buffer for it to be
1140	1269	* reused here.
1141	1270	*/
1142		- jbd_lock_bh_state(bh);
	1271	+ spin_lock(&jh->b_state_lock);
1143	1272	J_ASSERT_JH(jh, (jh->b_transaction == transaction \|\|
1144	1273	jh->b_transaction == NULL \|\|
1145	1274	(jh->b_transaction == journal->j_committing_transaction &&
..	..	@@ -1174,7 +1303,7 @@
1174	1303	jh->b_next_transaction = transaction;
1175	1304	spin_unlock(&journal->j_list_lock);
1176	1305	}
1177		- jbd_unlock_bh_state(bh);
	1306	+ spin_unlock(&jh->b_state_lock);
1178	1307
1179	1308	/*
1180	1309	* akpm: I added this. ext3_alloc_branch can pick up new indirect
..	..	@@ -1191,7 +1320,7 @@
1191	1320	}
1192	1321
1193	1322	/**
1194		- * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
	1323	+ * jbd2_journal_get_undo_access() - Notify intent to modify metadata with
1195	1324	* non-rewindable consequences
1196	1325	* @handle: transaction
1197	1326	* @bh: buffer to undo
..	..	@@ -1245,13 +1374,13 @@
1245	1374	committed_data = jbd2_alloc(jh2bh(jh)->b_size,
1246	1375	GFP_NOFS\|__GFP_NOFAIL);
1247	1376
1248		- jbd_lock_bh_state(bh);
	1377	+ spin_lock(&jh->b_state_lock);
1249	1378	if (!jh->b_committed_data) {
1250	1379	/* Copy out the current buffer contents into the
1251	1380	* preserved, committed copy. */
1252	1381	JBUFFER_TRACE(jh, "generate b_committed data");
1253	1382	if (!committed_data) {
1254		- jbd_unlock_bh_state(bh);
	1383	+ spin_unlock(&jh->b_state_lock);
1255	1384	goto repeat;
1256	1385	}
1257	1386
..	..	@@ -1259,7 +1388,7 @@
1259	1388	committed_data = NULL;
1260	1389	memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
1261	1390	}
1262		- jbd_unlock_bh_state(bh);
	1391	+ spin_unlock(&jh->b_state_lock);
1263	1392	out:
1264	1393	jbd2_journal_put_journal_head(jh);
1265	1394	if (unlikely(committed_data))
..	..	@@ -1268,7 +1397,7 @@
1268	1397	}
1269	1398
1270	1399	/**
1271		- * void jbd2_journal_set_triggers() - Add triggers for commit writeout
	1400	+ * jbd2_journal_set_triggers() - Add triggers for commit writeout
1272	1401	* @bh: buffer to trigger on
1273	1402	* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
1274	1403	*
..	..	@@ -1310,7 +1439,7 @@
1310	1439	}
1311	1440
1312	1441	/**
1313		- * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
	1442	+ * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
1314	1443	* @handle: transaction to add buffer to.
1315	1444	* @bh: buffer to mark
1316	1445	*
..	..	@@ -1339,8 +1468,6 @@
1339	1468	struct journal_head *jh;
1340	1469	int ret = 0;
1341	1470
1342		- if (is_handle_aborted(handle))
1343		- return -EROFS;
1344	1471	if (!buffer_jbd(bh))
1345	1472	return -EUCLEAN;
1346	1473
..	..	@@ -1358,18 +1485,18 @@
1358	1485	* crucial to catch bugs so let's do a reliable check until the
1359	1486	* lockless handling is fully proven.
1360	1487	*/
1361		- if (jh->b_transaction != transaction &&
1362		- jh->b_next_transaction != transaction) {
1363		- jbd_lock_bh_state(bh);
	1488	+ if (data_race(jh->b_transaction != transaction &&
	1489	+ jh->b_next_transaction != transaction)) {
	1490	+ spin_lock(&jh->b_state_lock);
1364	1491	J_ASSERT_JH(jh, jh->b_transaction == transaction \|\|
1365	1492	jh->b_next_transaction == transaction);
1366		- jbd_unlock_bh_state(bh);
	1493	+ spin_unlock(&jh->b_state_lock);
1367	1494	}
1368	1495	if (jh->b_modified == 1) {
1369	1496	/* If it's in our transaction it must be in BJ_Metadata list. */
1370		- if (jh->b_transaction == transaction &&
1371		- jh->b_jlist != BJ_Metadata) {
1372		- jbd_lock_bh_state(bh);
	1497	+ if (data_race(jh->b_transaction == transaction &&
	1498	+ jh->b_jlist != BJ_Metadata)) {
	1499	+ spin_lock(&jh->b_state_lock);
1373	1500	if (jh->b_transaction == transaction &&
1374	1501	jh->b_jlist != BJ_Metadata)
1375	1502	pr_err("JBD2: assertion failure: h_type=%u "
..	..	@@ -1379,13 +1506,25 @@
1379	1506	jh->b_jlist);
1380	1507	J_ASSERT_JH(jh, jh->b_transaction != transaction \|\|
1381	1508	jh->b_jlist == BJ_Metadata);
1382		- jbd_unlock_bh_state(bh);
	1509	+ spin_unlock(&jh->b_state_lock);
1383	1510	}
1384	1511	goto out;
1385	1512	}
1386	1513
1387	1514	journal = transaction->t_journal;
1388		- jbd_lock_bh_state(bh);
	1515	+ spin_lock(&jh->b_state_lock);
	1516	+
	1517	+ if (is_handle_aborted(handle)) {
	1518	+ /*
	1519	+ * Check journal aborting with @jh->b_state_lock locked,
	1520	+ * since 'jh->b_transaction' could be replaced with
	1521	+ * 'jh->b_next_transaction' during old transaction
	1522	+ * committing if journal aborted, which may fail
	1523	+ * assertion on 'jh->b_frozen_data == NULL'.
	1524	+ */
	1525	+ ret = -EROFS;
	1526	+ goto out_unlock_bh;
	1527	+ }
1389	1528
1390	1529	if (jh->b_modified == 0) {
1391	1530	/*
..	..	@@ -1393,12 +1532,12 @@
1393	1532	* of the transaction. This needs to be done
1394	1533	* once a transaction -bzzz
1395	1534	*/
1396		- if (handle->h_buffer_credits <= 0) {
	1535	+ if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
1397	1536	ret = -ENOSPC;
1398	1537	goto out_unlock_bh;
1399	1538	}
1400	1539	jh->b_modified = 1;
1401		- handle->h_buffer_credits--;
	1540	+ handle->h_total_credits--;
1402	1541	}
1403	1542
1404	1543	/*
..	..	@@ -1471,14 +1610,14 @@
1471	1610	__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
1472	1611	spin_unlock(&journal->j_list_lock);
1473	1612	out_unlock_bh:
1474		- jbd_unlock_bh_state(bh);
	1613	+ spin_unlock(&jh->b_state_lock);
1475	1614	out:
1476	1615	JBUFFER_TRACE(jh, "exit");
1477	1616	return ret;
1478	1617	}
1479	1618
1480	1619	/**
1481		- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
	1620	+ * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
1482	1621	* @handle: transaction handle
1483	1622	* @bh: bh to 'forget'
1484	1623	*
..	..	@@ -1494,7 +1633,7 @@
1494	1633	* Allow this call even if the handle has aborted --- it may be part of
1495	1634	* the caller's cleanup after an abort.
1496	1635	*/
1497		-int jbd2_journal_forget (handle_t handle, struct buffer_head bh)
	1636	+int jbd2_journal_forget(handle_t handle, struct buffer_head bh)
1498	1637	{
1499	1638	transaction_t *transaction = handle->h_transaction;
1500	1639	journal_t *journal;
..	..	@@ -1509,18 +1648,20 @@
1509	1648
1510	1649	BUFFER_TRACE(bh, "entry");
1511	1650
1512		- jbd_lock_bh_state(bh);
	1651	+ jh = jbd2_journal_grab_journal_head(bh);
	1652	+ if (!jh) {
	1653	+ __bforget(bh);
	1654	+ return 0;
	1655	+ }
1513	1656
1514		- if (!buffer_jbd(bh))
1515		- goto not_jbd;
1516		- jh = bh2jh(bh);
	1657	+ spin_lock(&jh->b_state_lock);
1517	1658
1518	1659	/* Critical error: attempting to delete a bitmap buffer, maybe?
1519	1660	* Don't do any jbd operations, and return an error. */
1520	1661	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1521	1662	"inconsistent data on disk")) {
1522	1663	err = -EIO;
1523		- goto not_jbd;
	1664	+ goto drop;
1524	1665	}
1525	1666
1526	1667	/* keep track of whether or not this transaction modified us */
..	..	@@ -1568,12 +1709,7 @@
1568	1709	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1569	1710	} else {
1570	1711	__jbd2_journal_unfile_buffer(jh);
1571		- if (!buffer_jbd(bh)) {
1572		- spin_unlock(&journal->j_list_lock);
1573		- jbd_unlock_bh_state(bh);
1574		- __bforget(bh);
1575		- goto drop;
1576		- }
	1712	+ jbd2_journal_put_journal_head(jh);
1577	1713	}
1578	1714	spin_unlock(&journal->j_list_lock);
1579	1715	} else if (jh->b_transaction) {
..	..	@@ -1605,21 +1741,52 @@
1605	1741	if (was_modified)
1606	1742	drop_reserve = 1;
1607	1743	}
1608		- }
	1744	+ } else {
	1745	+ /*
	1746	+ * Finally, if the buffer is not belongs to any
	1747	+ * transaction, we can just drop it now if it has no
	1748	+ * checkpoint.
	1749	+ */
	1750	+ spin_lock(&journal->j_list_lock);
	1751	+ if (!jh->b_cp_transaction) {
	1752	+ JBUFFER_TRACE(jh, "belongs to none transaction");
	1753	+ spin_unlock(&journal->j_list_lock);
	1754	+ goto drop;
	1755	+ }
1609	1756
1610		-not_jbd:
1611		- jbd_unlock_bh_state(bh);
1612		- __brelse(bh);
	1757	+ /*
	1758	+ * Otherwise, if the buffer has been written to disk,
	1759	+ * it is safe to remove the checkpoint and drop it.
	1760	+ */
	1761	+ if (!buffer_dirty(bh)) {
	1762	+ __jbd2_journal_remove_checkpoint(jh);
	1763	+ spin_unlock(&journal->j_list_lock);
	1764	+ goto drop;
	1765	+ }
	1766	+
	1767	+ /*
	1768	+ * The buffer is still not written to disk, we should
	1769	+ * attach this buffer to current transaction so that the
	1770	+ * buffer can be checkpointed only after the current
	1771	+ * transaction commits.
	1772	+ */
	1773	+ clear_buffer_dirty(bh);
	1774	+ __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
	1775	+ spin_unlock(&journal->j_list_lock);
	1776	+ }
1613	1777	drop:
	1778	+ __brelse(bh);
	1779	+ spin_unlock(&jh->b_state_lock);
	1780	+ jbd2_journal_put_journal_head(jh);
1614	1781	if (drop_reserve) {
1615	1782	/* no need to reserve log space for this block -bzzz */
1616		- handle->h_buffer_credits++;
	1783	+ handle->h_total_credits++;
1617	1784	}
1618	1785	return err;
1619	1786	}
1620	1787
1621	1788	/**
1622		- * int jbd2_journal_stop() - complete a transaction
	1789	+ * jbd2_journal_stop() - complete a transaction
1623	1790	* @handle: transaction to complete.
1624	1791	*
1625	1792	* All done for a particular handle.
..	..	@@ -1642,45 +1809,34 @@
1642	1809	tid_t tid;
1643	1810	pid_t pid;
1644	1811
	1812	+ if (--handle->h_ref > 0) {
	1813	+ jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
	1814	+ handle->h_ref);
	1815	+ if (is_handle_aborted(handle))
	1816	+ return -EIO;
	1817	+ return 0;
	1818	+ }
1645	1819	if (!transaction) {
1646	1820	/*
1647		- * Handle is already detached from the transaction so
1648		- * there is nothing to do other than decrease a refcount,
1649		- * or free the handle if refcount drops to zero
	1821	+ * Handle is already detached from the transaction so there is
	1822	+ * nothing to do other than free the handle.
1650	1823	*/
1651		- if (--handle->h_ref > 0) {
1652		- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1653		- handle->h_ref);
1654		- return err;
1655		- } else {
1656		- if (handle->h_rsv_handle)
1657		- jbd2_free_handle(handle->h_rsv_handle);
1658		- goto free_and_exit;
1659		- }
	1824	+ memalloc_nofs_restore(handle->saved_alloc_context);
	1825	+ goto free_and_exit;
1660	1826	}
1661	1827	journal = transaction->t_journal;
1662		-
1663		- J_ASSERT(journal_current_handle() == handle);
	1828	+ tid = transaction->t_tid;
1664	1829
1665	1830	if (is_handle_aborted(handle))
1666	1831	err = -EIO;
1667		- else
1668		- J_ASSERT(atomic_read(&transaction->t_updates) > 0);
1669		-
1670		- if (--handle->h_ref > 0) {
1671		- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1672		- handle->h_ref);
1673		- return err;
1674		- }
1675	1832
1676	1833	jbd_debug(4, "Handle %p going down\n", handle);
1677	1834	trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
1678		- transaction->t_tid,
1679		- handle->h_type, handle->h_line_no,
	1835	+ tid, handle->h_type, handle->h_line_no,
1680	1836	jiffies - handle->h_start_jiffies,
1681	1837	handle->h_sync, handle->h_requested_credits,
1682	1838	(handle->h_requested_credits -
1683		- handle->h_buffer_credits));
	1839	+ handle->h_total_credits));
1684	1840
1685	1841	/*
1686	1842	* Implement synchronous transaction batching. If the handle
..	..	@@ -1740,19 +1896,13 @@
1740	1896
1741	1897	if (handle->h_sync)
1742	1898	transaction->t_synchronous_commit = 1;
1743		- current->journal_info = NULL;
1744		- atomic_sub(handle->h_buffer_credits,
1745		- &transaction->t_outstanding_credits);
1746	1899
1747	1900	/*
1748	1901	* If the handle is marked SYNC, we need to set another commit
1749		- * going! We also want to force a commit if the current
1750		- * transaction is occupying too much of the log, or if the
1751		- * transaction is too old now.
	1902	+ * going! We also want to force a commit if the transaction is too
	1903	+ * old now.
1752	1904	*/
1753	1905	if (handle->h_sync \|\|
1754		- (atomic_read(&transaction->t_outstanding_credits) >
1755		- journal->j_max_transaction_buffers) \|\|
1756	1906	time_after_eq(jiffies, transaction->t_expires)) {
1757	1907	/* Do this even for aborted journals: an abort still
1758	1908	* completes the commit thread, it just doesn't write
..	..	@@ -1761,7 +1911,7 @@
1761	1911	jbd_debug(2, "transaction too old, requesting commit for "
1762	1912	"handle %p\n", handle);
1763	1913	/* This is non-blocking */
1764		- jbd2_log_start_commit(journal, transaction->t_tid);
	1914	+ jbd2_log_start_commit(journal, tid);
1765	1915
1766	1916	/*
1767	1917	* Special case: JBD2_SYNC synchronous updates require us
..	..	@@ -1772,31 +1922,19 @@
1772	1922	}
1773	1923
1774	1924	/*
1775		- * Once we drop t_updates, if it goes to zero the transaction
1776		- * could start committing on us and eventually disappear. So
1777		- * once we do this, we must not dereference transaction
1778		- * pointer again.
	1925	+ * Once stop_this_handle() drops t_updates, the transaction could start
	1926	+ * committing on us and eventually disappear. So we must not
	1927	+ * dereference transaction pointer again after calling
	1928	+ * stop_this_handle().
1779	1929	*/
1780		- tid = transaction->t_tid;
1781		- if (atomic_dec_and_test(&transaction->t_updates)) {
1782		- wake_up(&journal->j_wait_updates);
1783		- if (journal->j_barrier_count)
1784		- wake_up(&journal->j_wait_transaction_locked);
1785		- }
1786		-
1787		- rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
	1930	+ stop_this_handle(handle);
1788	1931
1789	1932	if (wait_for_commit)
1790	1933	err = jbd2_log_wait_commit(journal, tid);
1791	1934
1792		- if (handle->h_rsv_handle)
1793		- jbd2_journal_free_reserved(handle->h_rsv_handle);
1794	1935	free_and_exit:
1795		- /*
1796		- * Scope of the GFP_NOFS context is over here and so we can restore the
1797		- * original alloc context.
1798		- */
1799		- memalloc_nofs_restore(handle->saved_alloc_context);
	1936	+ if (handle->h_rsv_handle)
	1937	+ jbd2_free_handle(handle->h_rsv_handle);
1800	1938	jbd2_free_handle(handle);
1801	1939	return err;
1802	1940	}
..	..	@@ -1814,7 +1952,7 @@
1814	1952	*
1815	1953	* j_list_lock is held.
1816	1954	*
1817		- * jbd_lock_bh_state(jh2bh(jh)) is held.
	1955	+ * jh->b_state_lock is held.
1818	1956	*/
1819	1957
1820	1958	static inline void
..	..	@@ -1838,7 +1976,7 @@
1838	1976	*
1839	1977	* Called with j_list_lock held, and the journal may not be locked.
1840	1978	*
1841		- * jbd_lock_bh_state(jh2bh(jh)) is held.
	1979	+ * jh->b_state_lock is held.
1842	1980	*/
1843	1981
1844	1982	static inline void
..	..	@@ -1870,7 +2008,7 @@
1870	2008	transaction_t *transaction;
1871	2009	struct buffer_head *bh = jh2bh(jh);
1872	2010
1873		- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
	2011	+ lockdep_assert_held(&jh->b_state_lock);
1874	2012	transaction = jh->b_transaction;
1875	2013	if (transaction)
1876	2014	assert_spin_locked(&transaction->t_journal->j_list_lock);
..	..	@@ -1907,11 +2045,10 @@
1907	2045	}
1908	2046
1909	2047	/*
1910		- * Remove buffer from all transactions.
	2048	+ * Remove buffer from all transactions. The caller is responsible for dropping
	2049	+ * the jh reference that belonged to the transaction.
1911	2050	*
1912	2051	* Called with bh_state lock and j_list_lock
1913		- *
1914		- * jh and bh may be already freed when this function returns.
1915	2052	*/
1916	2053	static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
1917	2054	{
..	..	@@ -1920,7 +2057,6 @@
1920	2057
1921	2058	__jbd2_journal_temp_unlink_buffer(jh);
1922	2059	jh->b_transaction = NULL;
1923		- jbd2_journal_put_journal_head(jh);
1924	2060	}
1925	2061
1926	2062	void jbd2_journal_unfile_buffer(journal_t journal, struct journal_head jh)
..	..	@@ -1929,18 +2065,19 @@
1929	2065
1930	2066	/* Get reference so that buffer cannot be freed before we unlock it */
1931	2067	get_bh(bh);
1932		- jbd_lock_bh_state(bh);
	2068	+ spin_lock(&jh->b_state_lock);
1933	2069	spin_lock(&journal->j_list_lock);
1934	2070	__jbd2_journal_unfile_buffer(jh);
1935	2071	spin_unlock(&journal->j_list_lock);
1936		- jbd_unlock_bh_state(bh);
	2072	+ spin_unlock(&jh->b_state_lock);
	2073	+ jbd2_journal_put_journal_head(jh);
1937	2074	__brelse(bh);
1938	2075	}
1939	2076
1940	2077	/*
1941	2078	* Called from jbd2_journal_try_to_free_buffers().
1942	2079	*
1943		- * Called under jbd_lock_bh_state(bh)
	2080	+ * Called under jh->b_state_lock
1944	2081	*/
1945	2082	static void
1946	2083	__journal_try_to_free_buffer(journal_t journal, struct buffer_head bh)
..	..	@@ -1967,13 +2104,9 @@
1967	2104	}
1968	2105
1969	2106	/**
1970		- * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
	2107	+ * jbd2_journal_try_to_free_buffers() - try to free page buffers.
1971	2108	* @journal: journal for operation
1972	2109	* @page: to try and free
1973		- * @gfp_mask: we use the mask to detect how hard should we try to release
1974		- * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
1975		- * code to release the buffers.
1976		- *
1977	2110	*
1978	2111	* For all the buffers on this page,
1979	2112	* if they are fully written out ordered data, move them onto BUF_CLEAN
..	..	@@ -2004,8 +2137,7 @@
2004	2137	*
2005	2138	* Return 0 on failure, 1 on success
2006	2139	*/
2007		-int jbd2_journal_try_to_free_buffers(journal_t *journal,
2008		- struct page *page, gfp_t gfp_mask)
	2140	+int jbd2_journal_try_to_free_buffers(journal_t journal, struct page page)
2009	2141	{
2010	2142	struct buffer_head *head;
2011	2143	struct buffer_head *bh;
..	..	@@ -2028,10 +2160,10 @@
2028	2160	if (!jh)
2029	2161	continue;
2030	2162
2031		- jbd_lock_bh_state(bh);
	2163	+ spin_lock(&jh->b_state_lock);
2032	2164	__journal_try_to_free_buffer(journal, bh);
	2165	+ spin_unlock(&jh->b_state_lock);
2033	2166	jbd2_journal_put_journal_head(jh);
2034		- jbd_unlock_bh_state(bh);
2035	2167	if (buffer_jbd(bh))
2036	2168	goto busy;
2037	2169
..	..	@@ -2067,7 +2199,7 @@
2067	2199	*
2068	2200	* Called under j_list_lock.
2069	2201	*
2070		- * Called under jbd_lock_bh_state(bh).
	2202	+ * Called under jh->b_state_lock.
2071	2203	*/
2072	2204	static int __dispose_buffer(struct journal_head jh, transaction_t transaction)
2073	2205	{
..	..	@@ -2088,6 +2220,7 @@
2088	2220	} else {
2089	2221	JBUFFER_TRACE(jh, "on running transaction");
2090	2222	__jbd2_journal_unfile_buffer(jh);
	2223	+ jbd2_journal_put_journal_head(jh);
2091	2224	}
2092	2225	return may_free;
2093	2226	}
..	..	@@ -2154,17 +2287,14 @@
2154	2287	* holding the page lock. --sct
2155	2288	*/
2156	2289
2157		- if (!buffer_jbd(bh))
	2290	+ jh = jbd2_journal_grab_journal_head(bh);
	2291	+ if (!jh)
2158	2292	goto zap_buffer_unlocked;
2159	2293
2160	2294	/* OK, we have data buffer in journaled mode */
2161	2295	write_lock(&journal->j_state_lock);
2162		- jbd_lock_bh_state(bh);
	2296	+ spin_lock(&jh->b_state_lock);
2163	2297	spin_lock(&journal->j_list_lock);
2164		-
2165		- jh = jbd2_journal_grab_journal_head(bh);
2166		- if (!jh)
2167		- goto zap_buffer_no_jh;
2168	2298
2169	2299	/*
2170	2300	* We cannot remove the buffer from checkpoint lists until the
..	..	@@ -2244,10 +2374,13 @@
2244	2374	* for commit and try again.
2245	2375	*/
2246	2376	if (partial_page) {
2247		- jbd2_journal_put_journal_head(jh);
2248	2377	spin_unlock(&journal->j_list_lock);
2249		- jbd_unlock_bh_state(bh);
	2378	+ spin_unlock(&jh->b_state_lock);
2250	2379	write_unlock(&journal->j_state_lock);
	2380	+ jbd2_journal_put_journal_head(jh);
	2381	+ /* Already zapped buffer? Nothing to do... */
	2382	+ if (!bh->b_bdev)
	2383	+ return 0;
2251	2384	return -EBUSY;
2252	2385	}
2253	2386	/*
..	..	@@ -2261,10 +2394,10 @@
2261	2394	if (journal->j_running_transaction && buffer_jbddirty(bh))
2262	2395	jh->b_next_transaction = journal->j_running_transaction;
2263	2396	jh->b_modified = 0;
2264		- jbd2_journal_put_journal_head(jh);
2265	2397	spin_unlock(&journal->j_list_lock);
2266		- jbd_unlock_bh_state(bh);
	2398	+ spin_unlock(&jh->b_state_lock);
2267	2399	write_unlock(&journal->j_state_lock);
	2400	+ jbd2_journal_put_journal_head(jh);
2268	2401	return 0;
2269	2402	} else {
2270	2403	/* Good, the buffer belongs to the running transaction.
..	..	@@ -2288,11 +2421,10 @@
2288	2421	* here.
2289	2422	*/
2290	2423	jh->b_modified = 0;
2291		- jbd2_journal_put_journal_head(jh);
2292		-zap_buffer_no_jh:
2293	2424	spin_unlock(&journal->j_list_lock);
2294		- jbd_unlock_bh_state(bh);
	2425	+ spin_unlock(&jh->b_state_lock);
2295	2426	write_unlock(&journal->j_state_lock);
	2427	+ jbd2_journal_put_journal_head(jh);
2296	2428	zap_buffer_unlocked:
2297	2429	clear_buffer_dirty(bh);
2298	2430	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
..	..	@@ -2306,7 +2438,7 @@
2306	2438	}
2307	2439
2308	2440	/**
2309		- * void jbd2_journal_invalidatepage()
	2441	+ * jbd2_journal_invalidatepage()
2310	2442	* @journal: journal to use for flush...
2311	2443	* @page: page to flush
2312	2444	* @offset: start of the range to invalidate
..	..	@@ -2379,7 +2511,7 @@
2379	2511	int was_dirty = 0;
2380	2512	struct buffer_head *bh = jh2bh(jh);
2381	2513
2382		- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
	2514	+ lockdep_assert_held(&jh->b_state_lock);
2383	2515	assert_spin_locked(&transaction->t_journal->j_list_lock);
2384	2516
2385	2517	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
..	..	@@ -2441,11 +2573,11 @@
2441	2573	void jbd2_journal_file_buffer(struct journal_head *jh,
2442	2574	transaction_t *transaction, int jlist)
2443	2575	{
2444		- jbd_lock_bh_state(jh2bh(jh));
	2576	+ spin_lock(&jh->b_state_lock);
2445	2577	spin_lock(&transaction->t_journal->j_list_lock);
2446	2578	__jbd2_journal_file_buffer(jh, transaction, jlist);
2447	2579	spin_unlock(&transaction->t_journal->j_list_lock);
2448		- jbd_unlock_bh_state(jh2bh(jh));
	2580	+ spin_unlock(&jh->b_state_lock);
2449	2581	}
2450	2582
2451	2583	/*
..	..	@@ -2455,23 +2587,25 @@
2455	2587	* buffer on that transaction's metadata list.
2456	2588	*
2457	2589	* Called under j_list_lock
2458		- * Called under jbd_lock_bh_state(jh2bh(jh))
	2590	+ * Called under jh->b_state_lock
2459	2591	*
2460		- * jh and bh may be already free when this function returns
	2592	+ * When this function returns true, there's no next transaction to refile to
	2593	+ * and the caller has to drop jh reference through
	2594	+ * jbd2_journal_put_journal_head().
2461	2595	*/
2462		-void __jbd2_journal_refile_buffer(struct journal_head *jh)
	2596	+bool __jbd2_journal_refile_buffer(struct journal_head *jh)
2463	2597	{
2464	2598	int was_dirty, jlist;
2465	2599	struct buffer_head *bh = jh2bh(jh);
2466	2600
2467		- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
	2601	+ lockdep_assert_held(&jh->b_state_lock);
2468	2602	if (jh->b_transaction)
2469	2603	assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
2470	2604
2471	2605	/* If the buffer is now unused, just drop it. */
2472	2606	if (jh->b_next_transaction == NULL) {
2473	2607	__jbd2_journal_unfile_buffer(jh);
2474		- return;
	2608	+ return true;
2475	2609	}
2476	2610
2477	2611	/*
..	..	@@ -2506,6 +2640,7 @@
2506	2640
2507	2641	if (was_dirty)
2508	2642	set_buffer_jbddirty(bh);
	2643	+ return false;
2509	2644	}
2510	2645
2511	2646	/*
..	..	@@ -2516,16 +2651,15 @@
2516	2651	*/
2517	2652	void jbd2_journal_refile_buffer(journal_t journal, struct journal_head jh)
2518	2653	{
2519		- struct buffer_head *bh = jh2bh(jh);
	2654	+ bool drop;
2520	2655
2521		- /* Get reference so that buffer cannot be freed before we unlock it */
2522		- get_bh(bh);
2523		- jbd_lock_bh_state(bh);
	2656	+ spin_lock(&jh->b_state_lock);
2524	2657	spin_lock(&journal->j_list_lock);
2525		- __jbd2_journal_refile_buffer(jh);
2526		- jbd_unlock_bh_state(bh);
	2658	+ drop = __jbd2_journal_refile_buffer(jh);
	2659	+ spin_unlock(&jh->b_state_lock);
2527	2660	spin_unlock(&journal->j_list_lock);
2528		- __brelse(bh);
	2661	+ if (drop)
	2662	+ jbd2_journal_put_journal_head(jh);
2529	2663	}
2530	2664
2531	2665	/*
..	..	@@ -2584,18 +2718,6 @@
2584	2718	spin_unlock(&journal->j_list_lock);
2585	2719
2586	2720	return 0;
2587		-}
2588		-
2589		-int jbd2_journal_inode_add_write(handle_t handle, struct jbd2_inode jinode)
2590		-{
2591		- return jbd2_journal_file_inode(handle, jinode,
2592		- JI_WRITE_DATA \| JI_WAIT_DATA, 0, LLONG_MAX);
2593		-}
2594		-
2595		-int jbd2_journal_inode_add_wait(handle_t handle, struct jbd2_inode jinode)
2596		-{
2597		- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
2598		- LLONG_MAX);
2599	2721	}
2600	2722
2601	2723	int jbd2_journal_inode_ranged_write(handle_t *handle,