.. | .. |
---|
184 | 184 | /* |
---|
185 | 185 | * write the filemap data using writepage() address_space_operations. |
---|
186 | 186 | * We don't do block allocation here even for delalloc. We don't |
---|
187 | | - * use writepages() because with dealyed allocation we may be doing |
---|
| 187 | + * use writepages() because with delayed allocation we may be doing |
---|
188 | 188 | * block allocation in writepages(). |
---|
189 | 189 | */ |
---|
190 | | -static int journal_submit_inode_data_buffers(struct address_space *mapping, |
---|
191 | | - loff_t dirty_start, loff_t dirty_end) |
---|
| 190 | +int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) |
---|
192 | 191 | { |
---|
193 | | - int ret; |
---|
| 192 | + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; |
---|
194 | 193 | struct writeback_control wbc = { |
---|
195 | 194 | .sync_mode = WB_SYNC_ALL, |
---|
196 | 195 | .nr_to_write = mapping->nrpages * 2, |
---|
197 | | - .range_start = dirty_start, |
---|
198 | | - .range_end = dirty_end, |
---|
| 196 | + .range_start = jinode->i_dirty_start, |
---|
| 197 | + .range_end = jinode->i_dirty_end, |
---|
199 | 198 | }; |
---|
200 | 199 | |
---|
201 | | - ret = generic_writepages(mapping, &wbc); |
---|
202 | | - return ret; |
---|
| 200 | + /* |
---|
| 201 | + * submit the inode data buffers. We use writepage |
---|
| 202 | + * instead of writepages. Because writepages can do |
---|
| 203 | + * block allocation with delalloc. We need to write |
---|
| 204 | + * only allocated blocks here. |
---|
| 205 | + */ |
---|
| 206 | + return generic_writepages(mapping, &wbc); |
---|
203 | 207 | } |
---|
| 208 | + |
---|
| 209 | +/* Send all the data buffers related to an inode */ |
---|
| 210 | +int jbd2_submit_inode_data(struct jbd2_inode *jinode) |
---|
| 211 | +{ |
---|
| 212 | + |
---|
| 213 | + if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) |
---|
| 214 | + return 0; |
---|
| 215 | + |
---|
| 216 | + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); |
---|
| 217 | + return jbd2_journal_submit_inode_data_buffers(jinode); |
---|
| 218 | + |
---|
| 219 | +} |
---|
| 220 | +EXPORT_SYMBOL(jbd2_submit_inode_data); |
---|
| 221 | + |
---|
| 222 | +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) |
---|
| 223 | +{ |
---|
| 224 | + if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || |
---|
| 225 | + !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) |
---|
| 226 | + return 0; |
---|
| 227 | + return filemap_fdatawait_range_keep_errors( |
---|
| 228 | + jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, |
---|
| 229 | + jinode->i_dirty_end); |
---|
| 230 | +} |
---|
| 231 | +EXPORT_SYMBOL(jbd2_wait_inode_data); |
---|
204 | 232 | |
---|
205 | 233 | /* |
---|
206 | 234 | * Submit all the data buffers of inode associated with the transaction to |
---|
.. | .. |
---|
215 | 243 | { |
---|
216 | 244 | struct jbd2_inode *jinode; |
---|
217 | 245 | int err, ret = 0; |
---|
218 | | - struct address_space *mapping; |
---|
219 | 246 | |
---|
220 | 247 | spin_lock(&journal->j_list_lock); |
---|
221 | 248 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
---|
222 | | - loff_t dirty_start = jinode->i_dirty_start; |
---|
223 | | - loff_t dirty_end = jinode->i_dirty_end; |
---|
224 | | - |
---|
225 | 249 | if (!(jinode->i_flags & JI_WRITE_DATA)) |
---|
226 | 250 | continue; |
---|
227 | | - mapping = jinode->i_vfs_inode->i_mapping; |
---|
228 | 251 | jinode->i_flags |= JI_COMMIT_RUNNING; |
---|
229 | 252 | spin_unlock(&journal->j_list_lock); |
---|
230 | | - /* |
---|
231 | | - * submit the inode data buffers. We use writepage |
---|
232 | | - * instead of writepages. Because writepages can do |
---|
233 | | - * block allocation with delalloc. We need to write |
---|
234 | | - * only allocated blocks here. |
---|
235 | | - */ |
---|
| 253 | + /* submit the inode data buffers. */ |
---|
236 | 254 | trace_jbd2_submit_inode_data(jinode->i_vfs_inode); |
---|
237 | | - err = journal_submit_inode_data_buffers(mapping, dirty_start, |
---|
238 | | - dirty_end); |
---|
239 | | - if (!ret) |
---|
240 | | - ret = err; |
---|
| 255 | + if (journal->j_submit_inode_data_buffers) { |
---|
| 256 | + err = journal->j_submit_inode_data_buffers(jinode); |
---|
| 257 | + if (!ret) |
---|
| 258 | + ret = err; |
---|
| 259 | + } |
---|
241 | 260 | spin_lock(&journal->j_list_lock); |
---|
242 | 261 | J_ASSERT(jinode->i_transaction == commit_transaction); |
---|
243 | 262 | jinode->i_flags &= ~JI_COMMIT_RUNNING; |
---|
.. | .. |
---|
246 | 265 | } |
---|
247 | 266 | spin_unlock(&journal->j_list_lock); |
---|
248 | 267 | return ret; |
---|
| 268 | +} |
---|
| 269 | + |
---|
| 270 | +int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) |
---|
| 271 | +{ |
---|
| 272 | + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; |
---|
| 273 | + |
---|
| 274 | + return filemap_fdatawait_range_keep_errors(mapping, |
---|
| 275 | + jinode->i_dirty_start, |
---|
| 276 | + jinode->i_dirty_end); |
---|
249 | 277 | } |
---|
250 | 278 | |
---|
251 | 279 | /* |
---|
.. | .. |
---|
262 | 290 | /* For locking, see the comment in journal_submit_data_buffers() */ |
---|
263 | 291 | spin_lock(&journal->j_list_lock); |
---|
264 | 292 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
---|
265 | | - loff_t dirty_start = jinode->i_dirty_start; |
---|
266 | | - loff_t dirty_end = jinode->i_dirty_end; |
---|
267 | | - |
---|
268 | 293 | if (!(jinode->i_flags & JI_WAIT_DATA)) |
---|
269 | 294 | continue; |
---|
270 | 295 | jinode->i_flags |= JI_COMMIT_RUNNING; |
---|
271 | 296 | spin_unlock(&journal->j_list_lock); |
---|
272 | | - err = filemap_fdatawait_range_keep_errors( |
---|
273 | | - jinode->i_vfs_inode->i_mapping, dirty_start, |
---|
274 | | - dirty_end); |
---|
275 | | - if (!ret) |
---|
276 | | - ret = err; |
---|
| 297 | + /* wait for the inode data buffers writeout. */ |
---|
| 298 | + if (journal->j_finish_inode_data_buffers) { |
---|
| 299 | + err = journal->j_finish_inode_data_buffers(jinode); |
---|
| 300 | + if (!ret) |
---|
| 301 | + ret = err; |
---|
| 302 | + } |
---|
277 | 303 | spin_lock(&journal->j_list_lock); |
---|
278 | 304 | jinode->i_flags &= ~JI_COMMIT_RUNNING; |
---|
279 | 305 | smp_mb(); |
---|
.. | .. |
---|
413 | 439 | J_ASSERT(journal->j_running_transaction != NULL); |
---|
414 | 440 | J_ASSERT(journal->j_committing_transaction == NULL); |
---|
415 | 441 | |
---|
| 442 | + write_lock(&journal->j_state_lock); |
---|
| 443 | + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; |
---|
| 444 | + while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) { |
---|
| 445 | + DEFINE_WAIT(wait); |
---|
| 446 | + |
---|
| 447 | + prepare_to_wait(&journal->j_fc_wait, &wait, |
---|
| 448 | + TASK_UNINTERRUPTIBLE); |
---|
| 449 | + write_unlock(&journal->j_state_lock); |
---|
| 450 | + schedule(); |
---|
| 451 | + write_lock(&journal->j_state_lock); |
---|
| 452 | + finish_wait(&journal->j_fc_wait, &wait); |
---|
| 453 | + /* |
---|
| 454 | + * TODO: by blocking fast commits here, we are increasing |
---|
| 455 | + * fsync() latency slightly. Strictly speaking, we don't need |
---|
| 456 | + * to block fast commits until the transaction enters T_FLUSH |
---|
| 457 | + * state. So an optimization is possible where we block new fast |
---|
| 458 | + * commits here and wait for existing ones to complete |
---|
| 459 | + * just before we enter T_FLUSH. That way, the existing fast |
---|
| 460 | + * commits and this full commit can proceed parallely. |
---|
| 461 | + */ |
---|
| 462 | + } |
---|
| 463 | + write_unlock(&journal->j_state_lock); |
---|
| 464 | + |
---|
416 | 465 | commit_transaction = journal->j_running_transaction; |
---|
417 | 466 | |
---|
418 | 467 | trace_jbd2_start_commit(journal, commit_transaction); |
---|
.. | .. |
---|
420 | 469 | commit_transaction->t_tid); |
---|
421 | 470 | |
---|
422 | 471 | write_lock(&journal->j_state_lock); |
---|
| 472 | + journal->j_fc_off = 0; |
---|
423 | 473 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
---|
424 | 474 | commit_transaction->t_state = T_LOCKED; |
---|
425 | 475 | |
---|
.. | .. |
---|
450 | 500 | finish_wait(&journal->j_wait_updates, &wait); |
---|
451 | 501 | } |
---|
452 | 502 | spin_unlock(&commit_transaction->t_handle_lock); |
---|
| 503 | + commit_transaction->t_state = T_SWITCH; |
---|
453 | 504 | |
---|
454 | 505 | J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= |
---|
455 | 506 | journal->j_max_transaction_buffers); |
---|
.. | .. |
---|
469 | 520 | * has reserved. This is consistent with the existing behaviour |
---|
470 | 521 | * that multiple jbd2_journal_get_write_access() calls to the same |
---|
471 | 522 | * buffer are perfectly permissible. |
---|
| 523 | + * We use journal->j_state_lock here to serialize processing of |
---|
| 524 | + * t_reserved_list with eviction of buffers from journal_unmap_buffer(). |
---|
472 | 525 | */ |
---|
473 | 526 | while (commit_transaction->t_reserved_list) { |
---|
474 | 527 | jh = commit_transaction->t_reserved_list; |
---|
.. | .. |
---|
480 | 533 | if (jh->b_committed_data) { |
---|
481 | 534 | struct buffer_head *bh = jh2bh(jh); |
---|
482 | 535 | |
---|
483 | | - jbd_lock_bh_state(bh); |
---|
| 536 | + spin_lock(&jh->b_state_lock); |
---|
484 | 537 | jbd2_free(jh->b_committed_data, bh->b_size); |
---|
485 | 538 | jh->b_committed_data = NULL; |
---|
486 | | - jbd_unlock_bh_state(bh); |
---|
| 539 | + spin_unlock(&jh->b_state_lock); |
---|
487 | 540 | } |
---|
488 | 541 | jbd2_journal_refile_buffer(journal, jh); |
---|
489 | 542 | } |
---|
490 | 543 | |
---|
| 544 | + write_unlock(&journal->j_state_lock); |
---|
491 | 545 | /* |
---|
492 | 546 | * Now try to drop any written-back buffers from the journal's |
---|
493 | 547 | * checkpoint lists. We do this *before* commit because it potentially |
---|
.. | .. |
---|
510 | 564 | */ |
---|
511 | 565 | jbd2_journal_switch_revoke_table(journal); |
---|
512 | 566 | |
---|
| 567 | + write_lock(&journal->j_state_lock); |
---|
513 | 568 | /* |
---|
514 | 569 | * Reserved credits cannot be claimed anymore, free them |
---|
515 | 570 | */ |
---|
.. | .. |
---|
526 | 581 | journal->j_running_transaction = NULL; |
---|
527 | 582 | start_time = ktime_get(); |
---|
528 | 583 | commit_transaction->t_log_start = journal->j_head; |
---|
529 | | - wake_up(&journal->j_wait_transaction_locked); |
---|
| 584 | + wake_up_all(&journal->j_wait_transaction_locked); |
---|
530 | 585 | write_unlock(&journal->j_state_lock); |
---|
531 | 586 | |
---|
532 | 587 | jbd_debug(3, "JBD2: commit phase 2a\n"); |
---|
.. | .. |
---|
557 | 612 | stats.run.rs_logging = jiffies; |
---|
558 | 613 | stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, |
---|
559 | 614 | stats.run.rs_logging); |
---|
560 | | - stats.run.rs_blocks = |
---|
561 | | - atomic_read(&commit_transaction->t_outstanding_credits); |
---|
| 615 | + stats.run.rs_blocks = commit_transaction->t_nr_buffers; |
---|
562 | 616 | stats.run.rs_blocks_logged = 0; |
---|
563 | 617 | |
---|
564 | 618 | J_ASSERT(commit_transaction->t_nr_buffers <= |
---|
.. | .. |
---|
639 | 693 | |
---|
640 | 694 | /* |
---|
641 | 695 | * start_this_handle() uses t_outstanding_credits to determine |
---|
642 | | - * the free space in the log, but this counter is changed |
---|
643 | | - * by jbd2_journal_next_log_block() also. |
---|
| 696 | + * the free space in the log. |
---|
644 | 697 | */ |
---|
645 | 698 | atomic_dec(&commit_transaction->t_outstanding_credits); |
---|
646 | 699 | |
---|
.. | .. |
---|
759 | 812 | if (first_block < journal->j_tail) |
---|
760 | 813 | freed += journal->j_last - journal->j_first; |
---|
761 | 814 | /* Update tail only if we free significant amount of space */ |
---|
762 | | - if (freed < journal->j_maxlen / 4) |
---|
| 815 | + if (freed < jbd2_journal_get_max_txn_bufs(journal)) |
---|
763 | 816 | update_tail = 0; |
---|
764 | 817 | } |
---|
765 | 818 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
---|
.. | .. |
---|
774 | 827 | if (commit_transaction->t_need_data_flush && |
---|
775 | 828 | (journal->j_fs_dev != journal->j_dev) && |
---|
776 | 829 | (journal->j_flags & JBD2_BARRIER)) |
---|
777 | | - blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); |
---|
| 830 | + blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS); |
---|
778 | 831 | |
---|
779 | 832 | /* Done it all: now write the commit record asynchronously. */ |
---|
780 | 833 | if (jbd2_has_feature_async_commit(journal)) { |
---|
.. | .. |
---|
881 | 934 | stats.run.rs_blocks_logged++; |
---|
882 | 935 | if (jbd2_has_feature_async_commit(journal) && |
---|
883 | 936 | journal->j_flags & JBD2_BARRIER) { |
---|
884 | | - blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); |
---|
| 937 | + blkdev_issue_flush(journal->j_dev, GFP_NOFS); |
---|
885 | 938 | } |
---|
886 | 939 | |
---|
887 | 940 | if (err) |
---|
888 | 941 | jbd2_journal_abort(journal, err); |
---|
| 942 | + |
---|
| 943 | + WARN_ON_ONCE( |
---|
| 944 | + atomic_read(&commit_transaction->t_outstanding_credits) < 0); |
---|
889 | 945 | |
---|
890 | 946 | /* |
---|
891 | 947 | * Now disk caches for filesystem device are flushed so we are safe to |
---|
.. | .. |
---|
917 | 973 | transaction_t *cp_transaction; |
---|
918 | 974 | struct buffer_head *bh; |
---|
919 | 975 | int try_to_free = 0; |
---|
| 976 | + bool drop_ref; |
---|
920 | 977 | |
---|
921 | 978 | jh = commit_transaction->t_forget; |
---|
922 | 979 | spin_unlock(&journal->j_list_lock); |
---|
.. | .. |
---|
926 | 983 | * done with it. |
---|
927 | 984 | */ |
---|
928 | 985 | get_bh(bh); |
---|
929 | | - jbd_lock_bh_state(bh); |
---|
| 986 | + spin_lock(&jh->b_state_lock); |
---|
930 | 987 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); |
---|
931 | 988 | |
---|
932 | 989 | /* |
---|
.. | .. |
---|
1026 | 1083 | try_to_free = 1; |
---|
1027 | 1084 | } |
---|
1028 | 1085 | JBUFFER_TRACE(jh, "refile or unfile buffer"); |
---|
1029 | | - __jbd2_journal_refile_buffer(jh); |
---|
1030 | | - jbd_unlock_bh_state(bh); |
---|
| 1086 | + drop_ref = __jbd2_journal_refile_buffer(jh); |
---|
| 1087 | + spin_unlock(&jh->b_state_lock); |
---|
| 1088 | + if (drop_ref) |
---|
| 1089 | + jbd2_journal_put_journal_head(jh); |
---|
1031 | 1090 | if (try_to_free) |
---|
1032 | 1091 | release_buffer_page(bh); /* Drops bh reference */ |
---|
1033 | 1092 | else |
---|
.. | .. |
---|
1112 | 1171 | |
---|
1113 | 1172 | if (journal->j_commit_callback) |
---|
1114 | 1173 | journal->j_commit_callback(journal, commit_transaction); |
---|
| 1174 | + if (journal->j_fc_cleanup_callback) |
---|
| 1175 | + journal->j_fc_cleanup_callback(journal, 1); |
---|
1115 | 1176 | |
---|
1116 | 1177 | trace_jbd2_end_commit(journal, commit_transaction); |
---|
1117 | 1178 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", |
---|
1118 | 1179 | journal->j_commit_sequence, journal->j_tail_sequence); |
---|
1119 | 1180 | |
---|
1120 | 1181 | write_lock(&journal->j_state_lock); |
---|
| 1182 | + journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING; |
---|
| 1183 | + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; |
---|
1121 | 1184 | spin_lock(&journal->j_list_lock); |
---|
1122 | 1185 | commit_transaction->t_state = T_FINISHED; |
---|
1123 | 1186 | /* Check if the transaction can be dropped now that we are finished */ |
---|
.. | .. |
---|
1129 | 1192 | spin_unlock(&journal->j_list_lock); |
---|
1130 | 1193 | write_unlock(&journal->j_state_lock); |
---|
1131 | 1194 | wake_up(&journal->j_wait_done_commit); |
---|
| 1195 | + wake_up(&journal->j_fc_wait); |
---|
1132 | 1196 | |
---|
1133 | 1197 | /* |
---|
1134 | 1198 | * Calculate overall stats |
---|