hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/fs/incfs/data_mgmt.c
....@@ -3,11 +3,15 @@
33 * Copyright 2019 Google LLC
44 */
55 #include <linux/crc32.h>
6
+#include <linux/delay.h>
67 #include <linux/file.h>
8
+#include <linux/fsverity.h>
79 #include <linux/gfp.h>
10
+#include <linux/kobject.h>
811 #include <linux/ktime.h>
912 #include <linux/lz4.h>
1013 #include <linux/mm.h>
14
+#include <linux/namei.h>
1115 #include <linux/pagemap.h>
1216 #include <linux/slab.h>
1317 #include <linux/types.h>
....@@ -16,6 +20,10 @@
1620 #include "data_mgmt.h"
1721 #include "format.h"
1822 #include "integrity.h"
23
+#include "sysfs.h"
24
+#include "verity.h"
25
+
26
+static int incfs_scan_metadata_chain(struct data_file *df);
1927
2028 static void log_wake_up_all(struct work_struct *work)
2129 {
....@@ -24,12 +32,26 @@
2432 wake_up_all(&rl->ml_notif_wq);
2533 }
2634
35
+static void zstd_free_workspace(struct work_struct *work)
36
+{
37
+ struct delayed_work *dw = container_of(work, struct delayed_work, work);
38
+ struct mount_info *mi =
39
+ container_of(dw, struct mount_info, mi_zstd_cleanup_work);
40
+
41
+ mutex_lock(&mi->mi_zstd_workspace_mutex);
42
+ kvfree(mi->mi_zstd_workspace);
43
+ mi->mi_zstd_workspace = NULL;
44
+ mi->mi_zstd_stream = NULL;
45
+ mutex_unlock(&mi->mi_zstd_workspace_mutex);
46
+}
47
+
2748 struct mount_info *incfs_alloc_mount_info(struct super_block *sb,
2849 struct mount_options *options,
2950 struct path *backing_dir_path)
3051 {
3152 struct mount_info *mi = NULL;
3253 int error = 0;
54
+ struct incfs_sysfs_node *node;
3355
3456 mi = kzalloc(sizeof(*mi), GFP_NOFS);
3557 if (!mi)
....@@ -40,12 +62,25 @@
4062 mi->mi_owner = get_current_cred();
4163 path_get(&mi->mi_backing_dir_path);
4264 mutex_init(&mi->mi_dir_struct_mutex);
43
- mutex_init(&mi->mi_pending_reads_mutex);
4465 init_waitqueue_head(&mi->mi_pending_reads_notif_wq);
4566 init_waitqueue_head(&mi->mi_log.ml_notif_wq);
67
+ init_waitqueue_head(&mi->mi_blocks_written_notif_wq);
68
+ atomic_set(&mi->mi_blocks_written, 0);
4669 INIT_DELAYED_WORK(&mi->mi_log.ml_wakeup_work, log_wake_up_all);
4770 spin_lock_init(&mi->mi_log.rl_lock);
71
+ spin_lock_init(&mi->pending_read_lock);
4872 INIT_LIST_HEAD(&mi->mi_reads_list_head);
73
+ spin_lock_init(&mi->mi_per_uid_read_timeouts_lock);
74
+ mutex_init(&mi->mi_zstd_workspace_mutex);
75
+ INIT_DELAYED_WORK(&mi->mi_zstd_cleanup_work, zstd_free_workspace);
76
+ mutex_init(&mi->mi_le_mutex);
77
+
78
+ node = incfs_add_sysfs_node(options->sysfs_name, mi);
79
+ if (IS_ERR(node)) {
80
+ error = PTR_ERR(node);
81
+ goto err;
82
+ }
83
+ mi->mi_sysfs_node = node;
4984
5085 error = incfs_realloc_mount_info(mi, options);
5186 if (error)
....@@ -95,38 +130,144 @@
95130 kfree(old_buffer);
96131 }
97132
133
+ if (options->sysfs_name && !mi->mi_sysfs_node)
134
+ mi->mi_sysfs_node = incfs_add_sysfs_node(options->sysfs_name,
135
+ mi);
136
+ else if (!options->sysfs_name && mi->mi_sysfs_node) {
137
+ incfs_free_sysfs_node(mi->mi_sysfs_node);
138
+ mi->mi_sysfs_node = NULL;
139
+ } else if (options->sysfs_name &&
140
+ strcmp(options->sysfs_name,
141
+ kobject_name(&mi->mi_sysfs_node->isn_sysfs_node))) {
142
+ incfs_free_sysfs_node(mi->mi_sysfs_node);
143
+ mi->mi_sysfs_node = incfs_add_sysfs_node(options->sysfs_name,
144
+ mi);
145
+ }
146
+
147
+ if (IS_ERR(mi->mi_sysfs_node)) {
148
+ int err = PTR_ERR(mi->mi_sysfs_node);
149
+
150
+ mi->mi_sysfs_node = NULL;
151
+ return err;
152
+ }
153
+
98154 mi->mi_options = *options;
99155 return 0;
100156 }
101157
102158 void incfs_free_mount_info(struct mount_info *mi)
103159 {
160
+ int i;
104161 if (!mi)
105162 return;
106163
107164 flush_delayed_work(&mi->mi_log.ml_wakeup_work);
165
+ flush_delayed_work(&mi->mi_zstd_cleanup_work);
108166
109167 dput(mi->mi_index_dir);
168
+ dput(mi->mi_incomplete_dir);
110169 path_put(&mi->mi_backing_dir_path);
111170 mutex_destroy(&mi->mi_dir_struct_mutex);
112
- mutex_destroy(&mi->mi_pending_reads_mutex);
171
+ mutex_destroy(&mi->mi_zstd_workspace_mutex);
113172 put_cred(mi->mi_owner);
114173 kfree(mi->mi_log.rl_ring_buf);
115
- kfree(mi->log_xattr);
116
- kfree(mi->pending_read_xattr);
174
+ for (i = 0; i < ARRAY_SIZE(mi->pseudo_file_xattr); ++i)
175
+ kfree(mi->pseudo_file_xattr[i].data);
176
+ kfree(mi->mi_per_uid_read_timeouts);
177
+ incfs_free_sysfs_node(mi->mi_sysfs_node);
117178 kfree(mi);
118179 }
119180
120181 static void data_file_segment_init(struct data_file_segment *segment)
121182 {
122183 init_waitqueue_head(&segment->new_data_arrival_wq);
123
- mutex_init(&segment->blockmap_mutex);
184
+ init_rwsem(&segment->rwsem);
124185 INIT_LIST_HEAD(&segment->reads_list_head);
125186 }
126187
127
-static void data_file_segment_destroy(struct data_file_segment *segment)
188
+char *file_id_to_str(incfs_uuid_t id)
128189 {
129
- mutex_destroy(&segment->blockmap_mutex);
190
+ char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS);
191
+ char *end;
192
+
193
+ if (!result)
194
+ return NULL;
195
+
196
+ end = bin2hex(result, id.bytes, sizeof(id.bytes));
197
+ *end = 0;
198
+ return result;
199
+}
200
+
201
+struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name)
202
+{
203
+ struct inode *inode;
204
+ struct dentry *result = NULL;
205
+
206
+ if (!parent)
207
+ return ERR_PTR(-EFAULT);
208
+
209
+ inode = d_inode(parent);
210
+ inode_lock_nested(inode, I_MUTEX_PARENT);
211
+ result = lookup_one_len(name, parent, strlen(name));
212
+ inode_unlock(inode);
213
+
214
+ if (IS_ERR(result))
215
+ pr_warn("%s err:%ld\n", __func__, PTR_ERR(result));
216
+
217
+ return result;
218
+}
219
+
220
+static struct data_file *handle_mapped_file(struct mount_info *mi,
221
+ struct data_file *df)
222
+{
223
+ char *file_id_str;
224
+ struct dentry *index_file_dentry;
225
+ struct path path;
226
+ struct file *bf;
227
+ struct data_file *result = NULL;
228
+ const struct cred *old_cred;
229
+
230
+ file_id_str = file_id_to_str(df->df_id);
231
+ if (!file_id_str)
232
+ return ERR_PTR(-ENOENT);
233
+
234
+ index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir,
235
+ file_id_str);
236
+ kfree(file_id_str);
237
+ if (!index_file_dentry)
238
+ return ERR_PTR(-ENOENT);
239
+ if (IS_ERR(index_file_dentry))
240
+ return (struct data_file *)index_file_dentry;
241
+ if (!d_really_is_positive(index_file_dentry)) {
242
+ result = ERR_PTR(-ENOENT);
243
+ goto out;
244
+ }
245
+
246
+ path = (struct path) {
247
+ .mnt = mi->mi_backing_dir_path.mnt,
248
+ .dentry = index_file_dentry
249
+ };
250
+
251
+ old_cred = override_creds(mi->mi_owner);
252
+ bf = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE,
253
+ current_cred());
254
+ revert_creds(old_cred);
255
+
256
+ if (IS_ERR(bf)) {
257
+ result = (struct data_file *)bf;
258
+ goto out;
259
+ }
260
+
261
+ result = incfs_open_data_file(mi, bf);
262
+ fput(bf);
263
+ if (IS_ERR(result))
264
+ goto out;
265
+
266
+ result->df_mapped_offset = df->df_metadata_off;
267
+
268
+out:
269
+ dput(index_file_dentry);
270
+ return result;
130271 }
131272
132273 struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf)
....@@ -154,17 +295,15 @@
154295 goto out;
155296 }
156297
298
+ mutex_init(&df->df_enable_verity);
299
+
157300 df->df_backing_file_context = bfc;
158301 df->df_mount_info = mi;
159302 for (i = 0; i < ARRAY_SIZE(df->df_segments); i++)
160303 data_file_segment_init(&df->df_segments[i]);
161304
162
- error = mutex_lock_interruptible(&bfc->bc_mutex);
163
- if (error)
164
- goto out;
165305 error = incfs_read_file_header(bfc, &df->df_metadata_off, &df->df_id,
166306 &size, &df->df_header_flags);
167
- mutex_unlock(&bfc->bc_mutex);
168307
169308 if (error)
170309 goto out;
....@@ -172,6 +311,13 @@
172311 df->df_size = size;
173312 if (size > 0)
174313 df->df_data_block_count = get_blocks_count_for_size(size);
314
+
315
+ if (df->df_header_flags & INCFS_FILE_MAPPED) {
316
+ struct data_file *mapped_df = handle_mapped_file(mi, df);
317
+
318
+ incfs_free_data_file(df);
319
+ return mapped_df;
320
+ }
175321
176322 md_records = incfs_scan_metadata_chain(df);
177323 if (md_records < 0)
....@@ -190,16 +336,39 @@
190336
191337 void incfs_free_data_file(struct data_file *df)
192338 {
193
- int i;
339
+ u32 data_blocks_written, hash_blocks_written;
194340
195341 if (!df)
196342 return;
197343
344
+ data_blocks_written = atomic_read(&df->df_data_blocks_written);
345
+ hash_blocks_written = atomic_read(&df->df_hash_blocks_written);
346
+
347
+ if (data_blocks_written != df->df_initial_data_blocks_written ||
348
+ hash_blocks_written != df->df_initial_hash_blocks_written) {
349
+ struct backing_file_context *bfc = df->df_backing_file_context;
350
+ int error = -1;
351
+
352
+ if (bfc && !mutex_lock_interruptible(&bfc->bc_mutex)) {
353
+ error = incfs_write_status_to_backing_file(
354
+ df->df_backing_file_context,
355
+ df->df_status_offset,
356
+ data_blocks_written,
357
+ hash_blocks_written);
358
+ mutex_unlock(&bfc->bc_mutex);
359
+ }
360
+
361
+ if (error)
362
+ /* Nothing can be done, just warn */
363
+ pr_warn("incfs: failed to write status to backing file\n");
364
+ }
365
+
198366 incfs_free_mtree(df->df_hash_tree);
199
- for (i = 0; i < ARRAY_SIZE(df->df_segments); i++)
200
- data_file_segment_destroy(&df->df_segments[i]);
201367 incfs_free_bfc(df->df_backing_file_context);
202368 kfree(df->df_signature);
369
+ kfree(df->df_verity_file_digest.data);
370
+ kfree(df->df_verity_signature);
371
+ mutex_destroy(&df->df_enable_verity);
203372 kfree(df);
204373 }
205374
....@@ -252,14 +421,71 @@
252421 kfree(dir);
253422 }
254423
255
-static ssize_t decompress(struct mem_range src, struct mem_range dst)
424
+static ssize_t zstd_decompress_safe(struct mount_info *mi,
425
+ struct mem_range src, struct mem_range dst)
256426 {
257
- int result = LZ4_decompress_safe(src.data, dst.data, src.len, dst.len);
427
+ ssize_t result;
428
+ ZSTD_inBuffer inbuf = {.src = src.data, .size = src.len};
429
+ ZSTD_outBuffer outbuf = {.dst = dst.data, .size = dst.len};
258430
259
- if (result < 0)
260
- return -EBADMSG;
431
+ result = mutex_lock_interruptible(&mi->mi_zstd_workspace_mutex);
432
+ if (result)
433
+ return result;
261434
435
+ if (!mi->mi_zstd_stream) {
436
+ unsigned int workspace_size = ZSTD_DStreamWorkspaceBound(
437
+ INCFS_DATA_FILE_BLOCK_SIZE);
438
+ void *workspace = kvmalloc(workspace_size, GFP_NOFS);
439
+ ZSTD_DStream *stream;
440
+
441
+ if (!workspace) {
442
+ result = -ENOMEM;
443
+ goto out;
444
+ }
445
+
446
+ stream = ZSTD_initDStream(INCFS_DATA_FILE_BLOCK_SIZE, workspace,
447
+ workspace_size);
448
+ if (!stream) {
449
+ kvfree(workspace);
450
+ result = -EIO;
451
+ goto out;
452
+ }
453
+
454
+ mi->mi_zstd_workspace = workspace;
455
+ mi->mi_zstd_stream = stream;
456
+ }
457
+
458
+ result = ZSTD_decompressStream(mi->mi_zstd_stream, &outbuf, &inbuf) ?
459
+ -EBADMSG : outbuf.pos;
460
+
461
+ mod_delayed_work(system_wq, &mi->mi_zstd_cleanup_work,
462
+ msecs_to_jiffies(5000));
463
+
464
+out:
465
+ mutex_unlock(&mi->mi_zstd_workspace_mutex);
262466 return result;
467
+}
468
+
469
+static ssize_t decompress(struct mount_info *mi,
470
+ struct mem_range src, struct mem_range dst, int alg)
471
+{
472
+ int result;
473
+
474
+ switch (alg) {
475
+ case INCFS_BLOCK_COMPRESSED_LZ4:
476
+ result = LZ4_decompress_safe(src.data, dst.data, src.len,
477
+ dst.len);
478
+ if (result < 0)
479
+ return -EBADMSG;
480
+ return result;
481
+
482
+ case INCFS_BLOCK_COMPRESSED_ZSTD:
483
+ return zstd_decompress_safe(mi, src, dst);
484
+
485
+ default:
486
+ WARN_ON(true);
487
+ return -EOPNOTSUPP;
488
+ }
263489 }
264490
265491 static void log_read_one_record(struct read_log *rl, struct read_log_state *rs)
....@@ -276,10 +502,27 @@
276502
277503 case SAME_FILE:
278504 rs->base_record.block_index =
279
- record->same_file_record.block_index;
505
+ record->same_file.block_index;
280506 rs->base_record.absolute_ts_us +=
281
- record->same_file_record.relative_ts_us;
282
- record_size = sizeof(record->same_file_record);
507
+ record->same_file.relative_ts_us;
508
+ rs->base_record.uid = record->same_file.uid;
509
+ record_size = sizeof(record->same_file);
510
+ break;
511
+
512
+ case SAME_FILE_CLOSE_BLOCK:
513
+ rs->base_record.block_index +=
514
+ record->same_file_close_block.block_index_delta;
515
+ rs->base_record.absolute_ts_us +=
516
+ record->same_file_close_block.relative_ts_us;
517
+ record_size = sizeof(record->same_file_close_block);
518
+ break;
519
+
520
+ case SAME_FILE_CLOSE_BLOCK_SHORT:
521
+ rs->base_record.block_index +=
522
+ record->same_file_close_block_short.block_index_delta;
523
+ rs->base_record.absolute_ts_us +=
524
+ record->same_file_close_block_short.relative_ts_tens_us * 10;
525
+ record_size = sizeof(record->same_file_close_block_short);
283526 break;
284527
285528 case SAME_FILE_NEXT_BLOCK:
....@@ -292,7 +535,7 @@
292535 case SAME_FILE_NEXT_BLOCK_SHORT:
293536 ++rs->base_record.block_index;
294537 rs->base_record.absolute_ts_us +=
295
- record->same_file_next_block_short.relative_ts_us;
538
+ record->same_file_next_block_short.relative_ts_tens_us * 10;
296539 record_size = sizeof(record->same_file_next_block_short);
297540 break;
298541 }
....@@ -314,6 +557,11 @@
314557 s64 relative_us;
315558 union log_record record;
316559 size_t record_size;
560
+ uid_t uid = current_uid().val;
561
+ int block_delta;
562
+ bool same_file, same_uid;
563
+ bool next_block, close_block, very_close_block;
564
+ bool close_time, very_close_time, very_very_close_time;
317565
318566 /*
319567 * This may read the old value, but it's OK to delay the logging start
....@@ -334,37 +582,66 @@
334582 tail = &log->rl_tail;
335583 relative_us = now_us - head->base_record.absolute_ts_us;
336584
337
- if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) ||
338
- relative_us >= 1ll << 32) {
339
- record.full_record = (struct full_record){
340
- .type = FULL,
341
- .block_index = block_index,
342
- .file_id = *id,
343
- .absolute_ts_us = now_us,
344
- };
345
- head->base_record.file_id = *id;
346
- record_size = sizeof(struct full_record);
347
- } else if (block_index != head->base_record.block_index + 1 ||
348
- relative_us >= 1 << 30) {
349
- record.same_file_record = (struct same_file_record){
350
- .type = SAME_FILE,
351
- .block_index = block_index,
352
- .relative_ts_us = relative_us,
353
- };
354
- record_size = sizeof(struct same_file_record);
355
- } else if (relative_us >= 1 << 14) {
585
+ same_file = !memcmp(id, &head->base_record.file_id,
586
+ sizeof(incfs_uuid_t));
587
+ same_uid = uid == head->base_record.uid;
588
+
589
+ block_delta = block_index - head->base_record.block_index;
590
+ next_block = block_delta == 1;
591
+ very_close_block = block_delta >= S8_MIN && block_delta <= S8_MAX;
592
+ close_block = block_delta >= S16_MIN && block_delta <= S16_MAX;
593
+
594
+ very_very_close_time = relative_us < (1 << 5) * 10;
595
+ very_close_time = relative_us < (1 << 13);
596
+ close_time = relative_us < (1 << 16);
597
+
598
+ if (same_file && same_uid && next_block && very_very_close_time) {
599
+ record.same_file_next_block_short =
600
+ (struct same_file_next_block_short){
601
+ .type = SAME_FILE_NEXT_BLOCK_SHORT,
602
+ .relative_ts_tens_us = div_s64(relative_us, 10),
603
+ };
604
+ record_size = sizeof(struct same_file_next_block_short);
605
+ } else if (same_file && same_uid && next_block && very_close_time) {
356606 record.same_file_next_block = (struct same_file_next_block){
357607 .type = SAME_FILE_NEXT_BLOCK,
358608 .relative_ts_us = relative_us,
359609 };
360610 record_size = sizeof(struct same_file_next_block);
361
- } else {
362
- record.same_file_next_block_short =
363
- (struct same_file_next_block_short){
364
- .type = SAME_FILE_NEXT_BLOCK_SHORT,
365
- .relative_ts_us = relative_us,
611
+ } else if (same_file && same_uid && very_close_block &&
612
+ very_very_close_time) {
613
+ record.same_file_close_block_short =
614
+ (struct same_file_close_block_short){
615
+ .type = SAME_FILE_CLOSE_BLOCK_SHORT,
616
+ .relative_ts_tens_us = div_s64(relative_us, 10),
617
+ .block_index_delta = block_delta,
366618 };
367
- record_size = sizeof(struct same_file_next_block_short);
619
+ record_size = sizeof(struct same_file_close_block_short);
620
+ } else if (same_file && same_uid && close_block && very_close_time) {
621
+ record.same_file_close_block = (struct same_file_close_block){
622
+ .type = SAME_FILE_CLOSE_BLOCK,
623
+ .relative_ts_us = relative_us,
624
+ .block_index_delta = block_delta,
625
+ };
626
+ record_size = sizeof(struct same_file_close_block);
627
+ } else if (same_file && close_time) {
628
+ record.same_file = (struct same_file){
629
+ .type = SAME_FILE,
630
+ .block_index = block_index,
631
+ .relative_ts_us = relative_us,
632
+ .uid = uid,
633
+ };
634
+ record_size = sizeof(struct same_file);
635
+ } else {
636
+ record.full_record = (struct full_record){
637
+ .type = FULL,
638
+ .block_index = block_index,
639
+ .file_id = *id,
640
+ .absolute_ts_us = now_us,
641
+ .uid = uid,
642
+ };
643
+ head->base_record.file_id = *id;
644
+ record_size = sizeof(struct full_record);
368645 }
369646
370647 head->base_record.block_index = block_index;
....@@ -405,7 +682,11 @@
405682 int hash_per_block;
406683 pgoff_t file_pages;
407684
408
- tree = df->df_hash_tree;
685
+ /*
686
+ * Memory barrier to make sure tree is fully present if added via enable
687
+ * verity
688
+ */
689
+ tree = smp_load_acquire(&df->df_hash_tree);
409690 sig = df->df_signature;
410691 if (!tree || !sig)
411692 return 0;
....@@ -462,7 +743,7 @@
462743 int i;
463744 bool zero = true;
464745
465
- pr_debug("incfs: Hash mismatch lvl:%d blk:%d\n",
746
+ pr_warn("incfs: Hash mismatch lvl:%d blk:%d\n",
466747 lvl, block_index);
467748 for (i = 0; i < digest_size; i++)
468749 if (stored_digest[i]) {
....@@ -471,7 +752,7 @@
471752 }
472753
473754 if (zero)
474
- pr_debug("incfs: Note saved_digest all zero - did you forget to load the hashes?\n");
755
+ pr_debug("Note saved_digest all zero - did you forget to load the hashes?\n");
475756 return -EBADMSG;
476757 }
477758
....@@ -496,7 +777,7 @@
496777 return res;
497778
498779 if (memcmp(stored_digest, calculated_digest, digest_size)) {
499
- pr_debug("incfs: Leaf hash mismatch blk:%d\n", block_index);
780
+ pr_debug("Leaf hash mismatch blk:%d\n", block_index);
500781 return -EBADMSG;
501782 }
502783
....@@ -528,9 +809,7 @@
528809 res_block->db_backing_file_data_offset |=
529810 le32_to_cpu(bme->me_data_offset_lo);
530811 res_block->db_stored_size = le16_to_cpu(bme->me_data_size);
531
- res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ?
532
- COMPRESSION_LZ4 :
533
- COMPRESSION_NONE;
812
+ res_block->db_comp_alg = flags & INCFS_BLOCK_COMPRESSED_MASK;
534813 }
535814
536815 static int get_data_file_block(struct data_file *df, int index,
....@@ -580,36 +859,9 @@
580859 return 0;
581860 }
582861
583
-static int update_file_header_flags(struct data_file *df, u32 bits_to_reset,
584
- u32 bits_to_set)
585
-{
586
- int result;
587
- u32 new_flags;
588
- struct backing_file_context *bfc;
589
-
590
- if (!df)
591
- return -EFAULT;
592
- bfc = df->df_backing_file_context;
593
- if (!bfc)
594
- return -EFAULT;
595
-
596
- result = mutex_lock_interruptible(&bfc->bc_mutex);
597
- if (result)
598
- return result;
599
-
600
- new_flags = (df->df_header_flags & ~bits_to_reset) | bits_to_set;
601
- if (new_flags != df->df_header_flags) {
602
- df->df_header_flags = new_flags;
603
- result = incfs_write_file_header_flags(bfc, new_flags);
604
- }
605
-
606
- mutex_unlock(&bfc->bc_mutex);
607
-
608
- return result;
609
-}
610
-
611862 #define READ_BLOCKMAP_ENTRIES 512
612863 int incfs_get_filled_blocks(struct data_file *df,
864
+ struct incfs_file_data *fd,
613865 struct incfs_get_filled_blocks_args *arg)
614866 {
615867 int error = 0;
....@@ -623,6 +875,8 @@
623875 int i = READ_BLOCKMAP_ENTRIES - 1;
624876 int entries_read = 0;
625877 struct incfs_blockmap_entry *bme;
878
+ int data_blocks_filled = 0;
879
+ int hash_blocks_filled = 0;
626880
627881 *size_out = 0;
628882 if (end_index > df->df_total_block_count)
....@@ -630,7 +884,8 @@
630884 arg->total_blocks_out = df->df_total_block_count;
631885 arg->data_blocks_out = df->df_data_block_count;
632886
633
- if (df->df_header_flags & INCFS_FILE_COMPLETE) {
887
+ if (atomic_read(&df->df_data_blocks_written) ==
888
+ df->df_data_block_count) {
634889 pr_debug("File marked full, fast get_filled_blocks");
635890 if (arg->start_index > end_index) {
636891 arg->index_out = arg->start_index;
....@@ -683,6 +938,13 @@
683938
684939 convert_data_file_block(bme + i, &dfb);
685940
941
+ if (is_data_block_present(&dfb)) {
942
+ if (arg->index_out >= df->df_data_block_count)
943
+ ++hash_blocks_filled;
944
+ else
945
+ ++data_blocks_filled;
946
+ }
947
+
686948 if (is_data_block_present(&dfb) == in_range)
687949 continue;
688950
....@@ -712,13 +974,28 @@
712974 arg->index_out = range.begin;
713975 }
714976
715
- if (!error && in_range && arg->start_index == 0 &&
716
- end_index == df->df_total_block_count &&
717
- *size_out == sizeof(struct incfs_filled_range)) {
718
- int result =
719
- update_file_header_flags(df, 0, INCFS_FILE_COMPLETE);
720
- /* Log failure only, since it's just a failed optimization */
721
- pr_debug("Marked file full with result %d", result);
977
+ if (arg->start_index == 0) {
978
+ fd->fd_get_block_pos = 0;
979
+ fd->fd_filled_data_blocks = 0;
980
+ fd->fd_filled_hash_blocks = 0;
981
+ }
982
+
983
+ if (arg->start_index == fd->fd_get_block_pos) {
984
+ fd->fd_get_block_pos = arg->index_out + 1;
985
+ fd->fd_filled_data_blocks += data_blocks_filled;
986
+ fd->fd_filled_hash_blocks += hash_blocks_filled;
987
+ }
988
+
989
+ if (fd->fd_get_block_pos == df->df_total_block_count + 1) {
990
+ if (fd->fd_filled_data_blocks >
991
+ atomic_read(&df->df_data_blocks_written))
992
+ atomic_set(&df->df_data_blocks_written,
993
+ fd->fd_filled_data_blocks);
994
+
995
+ if (fd->fd_filled_hash_blocks >
996
+ atomic_read(&df->df_hash_blocks_written))
997
+ atomic_set(&df->df_hash_blocks_written,
998
+ fd->fd_filled_hash_blocks);
722999 }
7231000
7241001 kfree(bme);
....@@ -756,18 +1033,29 @@
7561033 result->file_id = df->df_id;
7571034 result->block_index = block_index;
7581035 result->timestamp_us = ktime_to_us(ktime_get());
1036
+ result->uid = current_uid().val;
7591037
760
- mutex_lock(&mi->mi_pending_reads_mutex);
1038
+ spin_lock(&mi->pending_read_lock);
7611039
7621040 result->serial_number = ++mi->mi_last_pending_read_number;
7631041 mi->mi_pending_reads_count++;
7641042
765
- list_add(&result->mi_reads_list, &mi->mi_reads_list_head);
766
- list_add(&result->segment_reads_list, &segment->reads_list_head);
767
- mutex_unlock(&mi->mi_pending_reads_mutex);
1043
+ list_add_rcu(&result->mi_reads_list, &mi->mi_reads_list_head);
1044
+ list_add_rcu(&result->segment_reads_list, &segment->reads_list_head);
1045
+
1046
+ spin_unlock(&mi->pending_read_lock);
7681047
7691048 wake_up_all(&mi->mi_pending_reads_notif_wq);
7701049 return result;
1050
+}
1051
+
1052
+static void free_pending_read_entry(struct rcu_head *entry)
1053
+{
1054
+ struct pending_read *read;
1055
+
1056
+ read = container_of(entry, struct pending_read, rcu);
1057
+
1058
+ kfree(read);
7711059 }
7721060
7731061 /* Notifies a given data file that pending read is completed. */
....@@ -783,14 +1071,17 @@
7831071
7841072 mi = df->df_mount_info;
7851073
786
- mutex_lock(&mi->mi_pending_reads_mutex);
787
- list_del(&read->mi_reads_list);
788
- list_del(&read->segment_reads_list);
1074
+ spin_lock(&mi->pending_read_lock);
1075
+
1076
+ list_del_rcu(&read->mi_reads_list);
1077
+ list_del_rcu(&read->segment_reads_list);
7891078
7901079 mi->mi_pending_reads_count--;
791
- mutex_unlock(&mi->mi_pending_reads_mutex);
7921080
793
- kfree(read);
1081
+ spin_unlock(&mi->pending_read_lock);
1082
+
1083
+ /* Don't free. Wait for readers */
1084
+ call_rcu(&read->rcu, free_pending_read_entry);
7941085 }
7951086
7961087 static void notify_pending_reads(struct mount_info *mi,
....@@ -800,26 +1091,47 @@
8001091 struct pending_read *entry = NULL;
8011092
8021093 /* Notify pending reads waiting for this block. */
803
- mutex_lock(&mi->mi_pending_reads_mutex);
804
- list_for_each_entry(entry, &segment->reads_list_head,
1094
+ rcu_read_lock();
1095
+ list_for_each_entry_rcu(entry, &segment->reads_list_head,
8051096 segment_reads_list) {
8061097 if (entry->block_index == index)
8071098 set_read_done(entry);
8081099 }
809
- mutex_unlock(&mi->mi_pending_reads_mutex);
1100
+ rcu_read_unlock();
8101101 wake_up_all(&segment->new_data_arrival_wq);
1102
+
1103
+ atomic_inc(&mi->mi_blocks_written);
1104
+ wake_up_all(&mi->mi_blocks_written_notif_wq);
1105
+}
1106
+
1107
+static int usleep_interruptible(u32 us)
1108
+{
1109
+ /* See:
1110
+ * https://www.kernel.org/doc/Documentation/timers/timers-howto.txt
1111
+ * for explanation
1112
+ */
1113
+ if (us < 10) {
1114
+ udelay(us);
1115
+ return 0;
1116
+ } else if (us < 20000) {
1117
+ usleep_range(us, us + us / 10);
1118
+ return 0;
1119
+ } else
1120
+ return msleep_interruptible(us / 1000);
8111121 }
8121122
8131123 static int wait_for_data_block(struct data_file *df, int block_index,
814
- int timeout_ms,
815
- struct data_file_block *res_block)
1124
+ struct data_file_block *res_block,
1125
+ struct incfs_read_data_file_timeouts *timeouts)
8161126 {
8171127 struct data_file_block block = {};
8181128 struct data_file_segment *segment = NULL;
8191129 struct pending_read *read = NULL;
8201130 struct mount_info *mi = NULL;
821
- int error = 0;
1131
+ int error;
8221132 int wait_res = 0;
1133
+ unsigned int delayed_pending_us = 0, delayed_min_us = 0;
1134
+ bool delayed_pending = false;
8231135
8241136 if (!df || !res_block)
8251137 return -EFAULT;
....@@ -827,50 +1139,59 @@
8271139 if (block_index < 0 || block_index >= df->df_data_block_count)
8281140 return -EINVAL;
8291141
830
- if (df->df_blockmap_off <= 0)
1142
+ if (df->df_blockmap_off <= 0 || !df->df_mount_info)
8311143 return -ENODATA;
8321144
1145
+ mi = df->df_mount_info;
8331146 segment = get_file_segment(df, block_index);
834
- error = mutex_lock_interruptible(&segment->blockmap_mutex);
1147
+
1148
+ error = down_read_killable(&segment->rwsem);
8351149 if (error)
8361150 return error;
8371151
8381152 /* Look up the given block */
8391153 error = get_data_file_block(df, block_index, &block);
8401154
841
- /* If it's not found, create a pending read */
842
- if (!error && !is_data_block_present(&block) && timeout_ms != 0)
843
- read = add_pending_read(df, block_index);
1155
+ up_read(&segment->rwsem);
8441156
845
- mutex_unlock(&segment->blockmap_mutex);
8461157 if (error)
8471158 return error;
8481159
8491160 /* If the block was found, just return it. No need to wait. */
8501161 if (is_data_block_present(&block)) {
8511162 *res_block = block;
1163
+ if (timeouts && timeouts->min_time_us) {
1164
+ delayed_min_us = timeouts->min_time_us;
1165
+ error = usleep_interruptible(delayed_min_us);
1166
+ goto out;
1167
+ }
8521168 return 0;
1169
+ } else {
1170
+ /* If it's not found, create a pending read */
1171
+ if (timeouts && timeouts->max_pending_time_us) {
1172
+ read = add_pending_read(df, block_index);
1173
+ if (!read)
1174
+ return -ENOMEM;
1175
+ } else {
1176
+ log_block_read(mi, &df->df_id, block_index);
1177
+ return -ETIME;
1178
+ }
8531179 }
8541180
855
- mi = df->df_mount_info;
856
-
857
- if (timeout_ms == 0) {
858
- log_block_read(mi, &df->df_id, block_index);
859
- return -ETIME;
1181
+ /* Rest of function only applies if timeouts != NULL */
1182
+ if (!timeouts) {
1183
+ pr_warn("incfs: timeouts unexpectedly NULL\n");
1184
+ return -EFSCORRUPTED;
8601185 }
861
-
862
- if (!read)
863
- return -ENOMEM;
8641186
8651187 /* Wait for notifications about block's arrival */
8661188 wait_res =
8671189 wait_event_interruptible_timeout(segment->new_data_arrival_wq,
868
- (is_read_done(read)),
869
- msecs_to_jiffies(timeout_ms));
1190
+ (is_read_done(read)),
1191
+ usecs_to_jiffies(timeouts->max_pending_time_us));
8701192
8711193 /* Woke up, the pending read is no longer needed. */
8721194 remove_pending_read(df, read);
873
- read = NULL;
8741195
8751196 if (wait_res == 0) {
8761197 /* Wait has timed out */
....@@ -885,12 +1206,23 @@
8851206 return wait_res;
8861207 }
8871208
888
- error = mutex_lock_interruptible(&segment->blockmap_mutex);
1209
+ delayed_pending = true;
1210
+ delayed_pending_us = timeouts->max_pending_time_us -
1211
+ jiffies_to_usecs(wait_res);
1212
+ if (timeouts->min_pending_time_us > delayed_pending_us) {
1213
+ delayed_min_us = timeouts->min_pending_time_us -
1214
+ delayed_pending_us;
1215
+ error = usleep_interruptible(delayed_min_us);
1216
+ if (error)
1217
+ return error;
1218
+ }
1219
+
1220
+ error = down_read_killable(&segment->rwsem);
8891221 if (error)
8901222 return error;
8911223
8921224 /*
893
- * Re-read block's info now, it has just arrived and
1225
+ * Re-read blocks info now, it has just arrived and
8941226 * should be available.
8951227 */
8961228 error = get_data_file_block(df, block_index, &block);
....@@ -899,21 +1231,58 @@
8991231 *res_block = block;
9001232 else {
9011233 /*
902
- * Somehow wait finished successfully bug block still
1234
+ * Somehow wait finished successfully but block still
9031235 * can't be found. It's not normal.
9041236 */
905
- pr_warn("incfs:Wait succeeded, but block not found.\n");
1237
+ pr_warn("incfs: Wait succeeded but block not found.\n");
9061238 error = -ENODATA;
9071239 }
9081240 }
1241
+ up_read(&segment->rwsem);
9091242
910
- mutex_unlock(&segment->blockmap_mutex);
911
- return error;
1243
+out:
1244
+ if (error)
1245
+ return error;
1246
+
1247
+ if (delayed_pending) {
1248
+ mi->mi_reads_delayed_pending++;
1249
+ mi->mi_reads_delayed_pending_us +=
1250
+ delayed_pending_us;
1251
+ }
1252
+
1253
+ if (delayed_min_us) {
1254
+ mi->mi_reads_delayed_min++;
1255
+ mi->mi_reads_delayed_min_us += delayed_min_us;
1256
+ }
1257
+
1258
+ return 0;
1259
+}
1260
+
1261
+static int incfs_update_sysfs_error(struct file *file, int index, int result,
1262
+ struct mount_info *mi, struct data_file *df)
1263
+{
1264
+ int error;
1265
+
1266
+ if (result >= 0)
1267
+ return 0;
1268
+
1269
+ error = mutex_lock_interruptible(&mi->mi_le_mutex);
1270
+ if (error)
1271
+ return error;
1272
+
1273
+ mi->mi_le_file_id = df->df_id;
1274
+ mi->mi_le_time_us = ktime_to_us(ktime_get());
1275
+ mi->mi_le_page = index;
1276
+ mi->mi_le_errno = result;
1277
+ mi->mi_le_uid = current_uid().val;
1278
+ mutex_unlock(&mi->mi_le_mutex);
1279
+
1280
+ return 0;
9121281 }
9131282
9141283 ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f,
915
- int index, int timeout_ms,
916
- struct mem_range tmp)
1284
+ int index, struct mem_range tmp,
1285
+ struct incfs_read_data_file_timeouts *timeouts)
9171286 {
9181287 loff_t pos;
9191288 ssize_t result;
....@@ -923,7 +1292,7 @@
9231292 struct data_file_block block = {};
9241293 struct data_file *df = get_incfs_data_file(f);
9251294
926
- if (!dst.data || !df)
1295
+ if (!dst.data || !df || !tmp.data)
9271296 return -EFAULT;
9281297
9291298 if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE)
....@@ -932,7 +1301,7 @@
9321301 mi = df->df_mount_info;
9331302 bfc = df->df_backing_file_context;
9341303
935
- result = wait_for_data_block(df, index, timeout_ms, &block);
1304
+ result = wait_for_data_block(df, index, &block, timeouts);
9361305 if (result < 0)
9371306 goto out;
9381307
....@@ -949,7 +1318,8 @@
9491318 result = incfs_kread(bfc, tmp.data, bytes_to_read, pos);
9501319 if (result == bytes_to_read) {
9511320 result =
952
- decompress(range(tmp.data, bytes_to_read), dst);
1321
+ decompress(mi, range(tmp.data, bytes_to_read),
1322
+ dst, block.db_comp_alg);
9531323 if (result < 0) {
9541324 const char *name =
9551325 bfc->bc_file->f_path.dentry->d_name.name;
....@@ -974,7 +1344,38 @@
9741344 log_block_read(mi, &df->df_id, index);
9751345
9761346 out:
1347
+ if (result == -ETIME)
1348
+ mi->mi_reads_failed_timed_out++;
1349
+ else if (result == -EBADMSG)
1350
+ mi->mi_reads_failed_hash_verification++;
1351
+ else if (result < 0)
1352
+ mi->mi_reads_failed_other++;
1353
+
1354
+ incfs_update_sysfs_error(f, index, result, mi, df);
1355
+
9771356 return result;
1357
+}
1358
+
1359
+ssize_t incfs_read_merkle_tree_blocks(struct mem_range dst,
1360
+ struct data_file *df, size_t offset)
1361
+{
1362
+ struct backing_file_context *bfc = NULL;
1363
+ struct incfs_df_signature *sig = NULL;
1364
+ size_t to_read = dst.len;
1365
+
1366
+ if (!dst.data || !df)
1367
+ return -EFAULT;
1368
+
1369
+ sig = df->df_signature;
1370
+ bfc = df->df_backing_file_context;
1371
+
1372
+ if (offset > sig->hash_size)
1373
+ return -ERANGE;
1374
+
1375
+ if (offset + to_read > sig->hash_size)
1376
+ to_read = sig->hash_size - offset;
1377
+
1378
+ return incfs_kread(bfc, dst.data, to_read, sig->hash_offset + offset);
9781379 }
9791380
9801381 int incfs_process_new_data_block(struct data_file *df,
....@@ -999,20 +1400,32 @@
9991400 segment = get_file_segment(df, block->block_index);
10001401 if (!segment)
10011402 return -EFAULT;
1403
+
10021404 if (block->compression == COMPRESSION_LZ4)
10031405 flags |= INCFS_BLOCK_COMPRESSED_LZ4;
1406
+ else if (block->compression == COMPRESSION_ZSTD)
1407
+ flags |= INCFS_BLOCK_COMPRESSED_ZSTD;
1408
+ else if (block->compression)
1409
+ return -EINVAL;
10041410
1005
- error = mutex_lock_interruptible(&segment->blockmap_mutex);
1411
+ error = down_read_killable(&segment->rwsem);
10061412 if (error)
10071413 return error;
10081414
10091415 error = get_data_file_block(df, block->block_index, &existing_block);
1416
+
1417
+ up_read(&segment->rwsem);
1418
+
10101419 if (error)
1011
- goto unlock;
1420
+ return error;
10121421 if (is_data_block_present(&existing_block)) {
10131422 /* Block is already present, nothing to do here */
1014
- goto unlock;
1423
+ return 0;
10151424 }
1425
+
1426
+ error = down_write_killable(&segment->rwsem);
1427
+ if (error)
1428
+ return error;
10161429
10171430 error = mutex_lock_interruptible(&bfc->bc_mutex);
10181431 if (!error) {
....@@ -1021,11 +1434,13 @@
10211434 df->df_blockmap_off, flags);
10221435 mutex_unlock(&bfc->bc_mutex);
10231436 }
1024
- if (!error)
1437
+ if (!error) {
10251438 notify_pending_reads(mi, segment, block->block_index);
1439
+ atomic_inc(&df->df_data_blocks_written);
1440
+ }
10261441
1027
-unlock:
1028
- mutex_unlock(&segment->blockmap_mutex);
1442
+ up_write(&segment->rwsem);
1443
+
10291444 if (error)
10301445 pr_debug("%d error: %d\n", block->block_index, error);
10311446 return error;
....@@ -1101,6 +1516,9 @@
11011516 hash_area_base, df->df_blockmap_off, df->df_size);
11021517 mutex_unlock(&bfc->bc_mutex);
11031518 }
1519
+ if (!error)
1520
+ atomic_inc(&df->df_hash_blocks_written);
1521
+
11041522 return error;
11051523 }
11061524
....@@ -1121,25 +1539,6 @@
11211539 df->df_total_block_count = block_count;
11221540 df->df_blockmap_off = base_off;
11231541 return error;
1124
-}
1125
-
1126
-static int process_file_attr_md(struct incfs_file_attr *fa,
1127
- struct metadata_handler *handler)
1128
-{
1129
- struct data_file *df = handler->context;
1130
- u16 attr_size = le16_to_cpu(fa->fa_size);
1131
-
1132
- if (!df)
1133
- return -EFAULT;
1134
-
1135
- if (attr_size > INCFS_MAX_FILE_ATTR_SIZE)
1136
- return -E2BIG;
1137
-
1138
- df->n_attr.fa_value_offset = le64_to_cpu(fa->fa_offset);
1139
- df->n_attr.fa_value_size = attr_size;
1140
- df->n_attr.fa_crc = le32_to_cpu(fa->fa_crc);
1141
-
1142
- return 0;
11431542 }
11441543
11451544 static int process_file_signature_md(struct incfs_file_signature *sg,
....@@ -1217,13 +1616,58 @@
12171616 return error;
12181617 }
12191618
1220
-int incfs_scan_metadata_chain(struct data_file *df)
1619
+static int process_status_md(struct incfs_status *is,
1620
+ struct metadata_handler *handler)
1621
+{
1622
+ struct data_file *df = handler->context;
1623
+
1624
+ df->df_initial_data_blocks_written =
1625
+ le32_to_cpu(is->is_data_blocks_written);
1626
+ atomic_set(&df->df_data_blocks_written,
1627
+ df->df_initial_data_blocks_written);
1628
+
1629
+ df->df_initial_hash_blocks_written =
1630
+ le32_to_cpu(is->is_hash_blocks_written);
1631
+ atomic_set(&df->df_hash_blocks_written,
1632
+ df->df_initial_hash_blocks_written);
1633
+
1634
+ df->df_status_offset = handler->md_record_offset;
1635
+ return 0;
1636
+}
1637
+
1638
+static int process_file_verity_signature_md(
1639
+ struct incfs_file_verity_signature *vs,
1640
+ struct metadata_handler *handler)
1641
+{
1642
+ struct data_file *df = handler->context;
1643
+ struct incfs_df_verity_signature *verity_signature;
1644
+
1645
+ if (!df)
1646
+ return -EFAULT;
1647
+
1648
+ verity_signature = kzalloc(sizeof(*verity_signature), GFP_NOFS);
1649
+ if (!verity_signature)
1650
+ return -ENOMEM;
1651
+
1652
+ verity_signature->offset = le64_to_cpu(vs->vs_offset);
1653
+ verity_signature->size = le32_to_cpu(vs->vs_size);
1654
+ if (verity_signature->size > FS_VERITY_MAX_SIGNATURE_SIZE) {
1655
+ kfree(verity_signature);
1656
+ return -EFAULT;
1657
+ }
1658
+
1659
+ df->df_verity_signature = verity_signature;
1660
+ return 0;
1661
+}
1662
+
1663
+static int incfs_scan_metadata_chain(struct data_file *df)
12211664 {
12221665 struct metadata_handler *handler = NULL;
12231666 int result = 0;
12241667 int records_count = 0;
12251668 int error = 0;
12261669 struct backing_file_context *bfc = NULL;
1670
+ int nondata_block_count;
12271671
12281672 if (!df || !df->df_backing_file_context)
12291673 return -EFAULT;
....@@ -1234,20 +1678,13 @@
12341678 if (!handler)
12351679 return -ENOMEM;
12361680
1237
- /* No writing to the backing file while it's being scanned. */
1238
- error = mutex_lock_interruptible(&bfc->bc_mutex);
1239
- if (error)
1240
- goto out;
1241
-
1242
- /* Reading superblock */
12431681 handler->md_record_offset = df->df_metadata_off;
12441682 handler->context = df;
12451683 handler->handle_blockmap = process_blockmap_md;
1246
- handler->handle_file_attr = process_file_attr_md;
12471684 handler->handle_signature = process_file_signature_md;
1685
+ handler->handle_status = process_status_md;
1686
+ handler->handle_verity_signature = process_file_verity_signature_md;
12481687
1249
- pr_debug("incfs: Starting reading incfs-metadata records at offset %lld\n",
1250
- handler->md_record_offset);
12511688 while (handler->md_record_offset > 0) {
12521689 error = incfs_read_next_metadata_record(bfc, handler);
12531690 if (error) {
....@@ -1259,27 +1696,32 @@
12591696 records_count++;
12601697 }
12611698 if (error) {
1262
- pr_debug("incfs: Error %d after reading %d incfs-metadata records.\n",
1699
+ pr_warn("incfs: Error %d after reading %d incfs-metadata records.\n",
12631700 -error, records_count);
12641701 result = error;
1265
- } else {
1266
- pr_debug("incfs: Finished reading %d incfs-metadata records.\n",
1267
- records_count);
1702
+ } else
12681703 result = records_count;
1269
- }
1270
- mutex_unlock(&bfc->bc_mutex);
12711704
1705
+ nondata_block_count = df->df_total_block_count -
1706
+ df->df_data_block_count;
12721707 if (df->df_hash_tree) {
12731708 int hash_block_count = get_blocks_count_for_size(
12741709 df->df_hash_tree->hash_tree_area_size);
12751710
1276
- if (df->df_data_block_count + hash_block_count !=
1277
- df->df_total_block_count)
1711
+ /*
1712
+ * Files that were created with a hash tree have the hash tree
1713
+ * included in the block map, i.e. nondata_block_count ==
1714
+ * hash_block_count. Files whose hash tree was added by
1715
+ * FS_IOC_ENABLE_VERITY will still have the original block
1716
+ * count, i.e. nondata_block_count == 0.
1717
+ */
1718
+ if (nondata_block_count != hash_block_count &&
1719
+ nondata_block_count != 0)
12781720 result = -EINVAL;
1279
- } else if (df->df_data_block_count != df->df_total_block_count)
1721
+ } else if (nondata_block_count != 0) {
12801722 result = -EINVAL;
1723
+ }
12811724
1282
-out:
12831725 kfree(handler);
12841726 return result;
12851727 }
....@@ -1292,16 +1734,17 @@
12921734 {
12931735 bool result = false;
12941736
1295
- mutex_lock(&mi->mi_pending_reads_mutex);
1737
+ spin_lock(&mi->pending_read_lock);
12961738 result = (mi->mi_last_pending_read_number > last_number) &&
1297
- (mi->mi_pending_reads_count > 0);
1298
- mutex_unlock(&mi->mi_pending_reads_mutex);
1739
+ (mi->mi_pending_reads_count > 0);
1740
+ spin_unlock(&mi->pending_read_lock);
12991741 return result;
13001742 }
13011743
13021744 int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound,
13031745 struct incfs_pending_read_info *reads,
1304
- int reads_size)
1746
+ struct incfs_pending_read_info2 *reads2,
1747
+ int reads_size, int *new_max_sn)
13051748 {
13061749 int reported_reads = 0;
13071750 struct pending_read *entry = NULL;
....@@ -1312,29 +1755,43 @@
13121755 if (reads_size <= 0)
13131756 return 0;
13141757
1315
- mutex_lock(&mi->mi_pending_reads_mutex);
1758
+ if (!incfs_fresh_pending_reads_exist(mi, sn_lowerbound))
1759
+ return 0;
13161760
1317
- if (mi->mi_last_pending_read_number <= sn_lowerbound
1318
- || mi->mi_pending_reads_count == 0)
1319
- goto unlock;
1761
+ rcu_read_lock();
13201762
1321
- list_for_each_entry(entry, &mi->mi_reads_list_head, mi_reads_list) {
1763
+ list_for_each_entry_rcu(entry, &mi->mi_reads_list_head, mi_reads_list) {
13221764 if (entry->serial_number <= sn_lowerbound)
13231765 continue;
13241766
1325
- reads[reported_reads].file_id = entry->file_id;
1326
- reads[reported_reads].block_index = entry->block_index;
1327
- reads[reported_reads].serial_number = entry->serial_number;
1328
- reads[reported_reads].timestamp_us = entry->timestamp_us;
1329
- /* reads[reported_reads].kind = INCFS_READ_KIND_PENDING; */
1767
+ if (reads) {
1768
+ reads[reported_reads].file_id = entry->file_id;
1769
+ reads[reported_reads].block_index = entry->block_index;
1770
+ reads[reported_reads].serial_number =
1771
+ entry->serial_number;
1772
+ reads[reported_reads].timestamp_us =
1773
+ entry->timestamp_us;
1774
+ }
1775
+
1776
+ if (reads2) {
1777
+ reads2[reported_reads].file_id = entry->file_id;
1778
+ reads2[reported_reads].block_index = entry->block_index;
1779
+ reads2[reported_reads].serial_number =
1780
+ entry->serial_number;
1781
+ reads2[reported_reads].timestamp_us =
1782
+ entry->timestamp_us;
1783
+ reads2[reported_reads].uid = entry->uid;
1784
+ }
1785
+
1786
+ if (entry->serial_number > *new_max_sn)
1787
+ *new_max_sn = entry->serial_number;
13301788
13311789 reported_reads++;
13321790 if (reported_reads >= reads_size)
13331791 break;
13341792 }
13351793
1336
-unlock:
1337
- mutex_unlock(&mi->mi_pending_reads_mutex);
1794
+ rcu_read_unlock();
13381795
13391796 return reported_reads;
13401797 }
....@@ -1370,8 +1827,9 @@
13701827 }
13711828
13721829 int incfs_collect_logged_reads(struct mount_info *mi,
1373
- struct read_log_state *reader_state,
1830
+ struct read_log_state *state,
13741831 struct incfs_pending_read_info *reads,
1832
+ struct incfs_pending_read_info2 *reads2,
13751833 int reads_size)
13761834 {
13771835 int dst_idx;
....@@ -1382,45 +1840,51 @@
13821840 head = &log->rl_head;
13831841 tail = &log->rl_tail;
13841842
1385
- if (reader_state->generation_id != head->generation_id) {
1843
+ if (state->generation_id != head->generation_id) {
13861844 pr_debug("read ptr is wrong generation: %u/%u",
1387
- reader_state->generation_id, head->generation_id);
1845
+ state->generation_id, head->generation_id);
13881846
1389
- *reader_state = (struct read_log_state){
1847
+ *state = (struct read_log_state){
13901848 .generation_id = head->generation_id,
13911849 };
13921850 }
13931851
1394
- if (reader_state->current_record_no < tail->current_record_no) {
1852
+ if (state->current_record_no < tail->current_record_no) {
13951853 pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n",
1396
- (u32)reader_state->next_offset,
1397
- (u32)reader_state->current_pass_no,
1854
+ (u32)state->next_offset,
1855
+ (u32)state->current_pass_no,
13981856 (u32)tail->next_offset, (u32)tail->current_pass_no);
13991857
1400
- *reader_state = *tail;
1858
+ *state = *tail;
14011859 }
14021860
14031861 for (dst_idx = 0; dst_idx < reads_size; dst_idx++) {
1404
- if (reader_state->current_record_no == head->current_record_no)
1862
+ if (state->current_record_no == head->current_record_no)
14051863 break;
14061864
1407
- log_read_one_record(log, reader_state);
1865
+ log_read_one_record(log, state);
14081866
1409
- reads[dst_idx] = (struct incfs_pending_read_info){
1410
- .file_id = reader_state->base_record.file_id,
1411
- .block_index = reader_state->base_record.block_index,
1412
- .serial_number = reader_state->current_record_no,
1413
- .timestamp_us = reader_state->base_record.absolute_ts_us
1414
- };
1867
+ if (reads)
1868
+ reads[dst_idx] = (struct incfs_pending_read_info) {
1869
+ .file_id = state->base_record.file_id,
1870
+ .block_index = state->base_record.block_index,
1871
+ .serial_number = state->current_record_no,
1872
+ .timestamp_us =
1873
+ state->base_record.absolute_ts_us,
1874
+ };
1875
+
1876
+ if (reads2)
1877
+ reads2[dst_idx] = (struct incfs_pending_read_info2) {
1878
+ .file_id = state->base_record.file_id,
1879
+ .block_index = state->base_record.block_index,
1880
+ .serial_number = state->current_record_no,
1881
+ .timestamp_us =
1882
+ state->base_record.absolute_ts_us,
1883
+ .uid = state->base_record.uid,
1884
+ };
14151885 }
14161886
14171887 spin_unlock(&log->rl_lock);
14181888 return dst_idx;
14191889 }
14201890
1421
-bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs)
1422
-{
1423
- if (lhs.len != rhs.len)
1424
- return false;
1425
- return memcmp(lhs.data, rhs.data, lhs.len) == 0;
1426
-}