hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/ext4/balloc.c
....@@ -303,6 +303,36 @@
303303 return desc;
304304 }
305305
306
+static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
307
+ ext4_group_t block_group,
308
+ struct buffer_head *bh)
309
+{
310
+ ext4_grpblk_t next_zero_bit;
311
+ unsigned long bitmap_size = sb->s_blocksize * 8;
312
+ unsigned int offset = num_clusters_in_group(sb, block_group);
313
+
314
+ if (bitmap_size <= offset)
315
+ return 0;
316
+
317
+ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
318
+
319
+ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
320
+}
321
+
322
+struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
323
+ ext4_group_t group)
324
+{
325
+ struct ext4_group_info **grp_info;
326
+ long indexv, indexh;
327
+
328
+ if (unlikely(group >= EXT4_SB(sb)->s_groups_count))
329
+ return NULL;
330
+ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
331
+ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
332
+ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
333
+ return grp_info[indexh];
334
+}
335
+
306336 /*
307337 * Return the block number which was discovered to be invalid, or 0 if
308338 * the block bitmap is valid.
....@@ -368,18 +398,24 @@
368398 struct buffer_head *bh)
369399 {
370400 ext4_fsblk_t blk;
371
- struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
401
+ struct ext4_group_info *grp;
402
+
403
+ if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
404
+ return 0;
405
+
406
+ grp = ext4_get_group_info(sb, block_group);
372407
373408 if (buffer_verified(bh))
374409 return 0;
375
- if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
410
+ if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
376411 return -EFSCORRUPTED;
377412
378413 ext4_lock_group(sb, block_group);
379414 if (buffer_verified(bh))
380415 goto verified;
381416 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
382
- desc, bh))) {
417
+ desc, bh) ||
418
+ ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
383419 ext4_unlock_group(sb, block_group);
384420 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
385421 ext4_mark_group_bitmap_corrupted(sb, block_group,
....@@ -393,6 +429,15 @@
393429 block_group, blk);
394430 ext4_mark_group_bitmap_corrupted(sb, block_group,
395431 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
432
+ return -EFSCORRUPTED;
433
+ }
434
+ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
435
+ if (unlikely(blk != 0)) {
436
+ ext4_unlock_group(sb, block_group);
437
+ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
438
+ block_group, blk);
439
+ ext4_mark_group_bitmap_corrupted(sb, block_group,
440
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
396441 return -EFSCORRUPTED;
397442 }
398443 set_buffer_verified(bh);
....@@ -409,10 +454,11 @@
409454 * Read the bitmap for a given block_group,and validate the
410455 * bits for block/inode/inode tables are set in the bitmaps
411456 *
412
- * Return buffer_head on success or NULL in case of failure.
457
+ * Return buffer_head on success or an ERR_PTR in case of failure.
413458 */
414459 struct buffer_head *
415
-ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
460
+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
461
+ bool ignore_locked)
416462 {
417463 struct ext4_group_desc *desc;
418464 struct ext4_sb_info *sbi = EXT4_SB(sb);
....@@ -438,6 +484,12 @@
438484 "block_group = %u, block_bitmap = %llu",
439485 block_group, bitmap_blk);
440486 return ERR_PTR(-ENOMEM);
487
+ }
488
+
489
+ if (ignore_locked && buffer_locked(bh)) {
490
+ /* buffer under IO already, return if called for prefetching */
491
+ put_bh(bh);
492
+ return NULL;
441493 }
442494
443495 if (bitmap_uptodate(bh))
....@@ -486,10 +538,10 @@
486538 * submit the buffer_head for reading
487539 */
488540 set_buffer_new(bh);
489
- trace_ext4_read_block_bitmap_load(sb, block_group);
490
- bh->b_end_io = ext4_end_bitmap_read;
491
- get_bh(bh);
492
- submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
541
+ trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
542
+ ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO |
543
+ (ignore_locked ? REQ_RAHEAD : 0),
544
+ ext4_end_bitmap_read);
493545 return bh;
494546 verify:
495547 err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
....@@ -501,7 +553,7 @@
501553 return ERR_PTR(err);
502554 }
503555
504
-/* Returns 0 on success, 1 on error */
556
+/* Returns 0 on success, -errno on error */
505557 int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
506558 struct buffer_head *bh)
507559 {
....@@ -513,10 +565,11 @@
513565 if (!desc)
514566 return -EFSCORRUPTED;
515567 wait_on_buffer(bh);
568
+ ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO);
516569 if (!buffer_uptodate(bh)) {
517
- ext4_error(sb, "Cannot read block bitmap - "
518
- "block_group = %u, block_bitmap = %llu",
519
- block_group, (unsigned long long) bh->b_blocknr);
570
+ ext4_error_err(sb, EIO, "Cannot read block bitmap - "
571
+ "block_group = %u, block_bitmap = %llu",
572
+ block_group, (unsigned long long) bh->b_blocknr);
520573 ext4_mark_group_bitmap_corrupted(sb, block_group,
521574 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
522575 return -EIO;
....@@ -532,7 +585,7 @@
532585 struct buffer_head *bh;
533586 int err;
534587
535
- bh = ext4_read_block_bitmap_nowait(sb, block_group);
588
+ bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
536589 if (IS_ERR(bh))
537590 return bh;
538591 err = ext4_wait_block_bitmap(sb, block_group, bh);
....@@ -611,28 +664,42 @@
611664 }
612665
613666 /**
614
- * ext4_should_retry_alloc()
615
- * @sb: super block
616
- * @retries number of attemps has been made
667
+ * ext4_should_retry_alloc() - check if a block allocation should be retried
668
+ * @sb: superblock
669
+ * @retries: number of retry attempts made so far
617670 *
618
- * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
619
- * it is profitable to retry the operation, this function will wait
620
- * for the current or committing transaction to complete, and then
621
- * return TRUE. We will only retry once.
671
+ * ext4_should_retry_alloc() is called when ENOSPC is returned while
672
+ * attempting to allocate blocks. If there's an indication that a pending
673
+ * journal transaction might free some space and allow another attempt to
674
+ * succeed, this function will wait for the current or committing transaction
675
+ * to complete and then return TRUE.
622676 */
623677 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
624678 {
625
- if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
626
- (*retries)++ > 1 ||
627
- !EXT4_SB(sb)->s_journal)
679
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
680
+
681
+ if (!sbi->s_journal)
628682 return 0;
629683
684
+ if (++(*retries) > 3) {
685
+ percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
686
+ return 0;
687
+ }
688
+
689
+ /*
690
+ * if there's no indication that blocks are about to be freed it's
691
+ * possible we just missed a transaction commit that did so
692
+ */
630693 smp_mb();
631
- if (EXT4_SB(sb)->s_mb_free_pending == 0)
632
- return 0;
694
+ if (sbi->s_mb_free_pending == 0)
695
+ return ext4_has_free_clusters(sbi, 1, 0);
633696
697
+ /*
698
+ * it's possible we've just missed a transaction commit here,
699
+ * so ignore the returned status
700
+ */
634701 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
635
- jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
702
+ (void) jbd2_journal_force_commit_nested(sbi->s_journal);
636703 return 1;
637704 }
638705
....@@ -836,11 +903,11 @@
836903 }
837904
838905 /*
839
- * This function returns the number of file system metadata clusters at
906
+ * This function returns the number of file system metadata blocks at
840907 * the beginning of a block group, including the reserved gdt blocks.
841908 */
842
-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
843
- ext4_group_t block_group)
909
+unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
910
+ ext4_group_t block_group)
844911 {
845912 struct ext4_sb_info *sbi = EXT4_SB(sb);
846913 unsigned num;
....@@ -858,8 +925,15 @@
858925 } else { /* For META_BG_BLOCK_GROUPS */
859926 num += ext4_bg_num_gdb(sb, block_group);
860927 }
861
- return EXT4_NUM_B2C(sbi, num);
928
+ return num;
862929 }
930
+
931
+static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
932
+ ext4_group_t block_group)
933
+{
934
+ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
935
+}
936
+
863937 /**
864938 * ext4_inode_to_goal_block - return a hint for block allocation
865939 * @inode: inode for block allocation
....@@ -901,10 +975,11 @@
901975 return bg_start;
902976
903977 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
904
- colour = (current->pid % 16) *
978
+ colour = (task_pid_nr(current) % 16) *
905979 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
906980 else
907
- colour = (current->pid % 16) * ((last_block - bg_start) / 16);
981
+ colour = (task_pid_nr(current) % 16) *
982
+ ((last_block - bg_start) / 16);
908983 return bg_start + colour;
909984 }
910985