| .. | .. |
|---|
| 10 | 10 | #include <linux/types.h> |
|---|
| 11 | 11 | #include <linux/mutex.h> |
|---|
| 12 | 12 | #include <linux/spinlock.h> |
|---|
| 13 | +#include <linux/rcupdate.h> |
|---|
| 13 | 14 | #include <linux/completion.h> |
|---|
| 14 | 15 | #include <linux/wait.h> |
|---|
| 16 | +#include <linux/zstd.h> |
|---|
| 15 | 17 | #include <crypto/hash.h> |
|---|
| 18 | +#include <linux/rwsem.h> |
|---|
| 16 | 19 | |
|---|
| 17 | 20 | #include <uapi/linux/incrementalfs.h> |
|---|
| 18 | 21 | |
|---|
| 19 | 22 | #include "internal.h" |
|---|
| 23 | +#include "pseudo_files.h" |
|---|
| 20 | 24 | |
|---|
| 21 | 25 | #define SEGMENTS_PER_FILE 3 |
|---|
| 22 | 26 | |
|---|
| 23 | 27 | enum LOG_RECORD_TYPE { |
|---|
| 24 | 28 | FULL, |
|---|
| 25 | 29 | SAME_FILE, |
|---|
| 30 | + SAME_FILE_CLOSE_BLOCK, |
|---|
| 31 | + SAME_FILE_CLOSE_BLOCK_SHORT, |
|---|
| 26 | 32 | SAME_FILE_NEXT_BLOCK, |
|---|
| 27 | 33 | SAME_FILE_NEXT_BLOCK_SHORT, |
|---|
| 28 | 34 | }; |
|---|
| 29 | 35 | |
|---|
| 30 | 36 | struct full_record { |
|---|
| 31 | | - enum LOG_RECORD_TYPE type : 2; /* FULL */ |
|---|
| 32 | | - u32 block_index : 30; |
|---|
| 37 | + enum LOG_RECORD_TYPE type : 3; /* FULL */ |
|---|
| 38 | + u32 block_index : 29; |
|---|
| 33 | 39 | incfs_uuid_t file_id; |
|---|
| 34 | 40 | u64 absolute_ts_us; |
|---|
| 35 | | -} __packed; /* 28 bytes */ |
|---|
| 41 | + uid_t uid; |
|---|
| 42 | +} __packed; /* 32 bytes */ |
|---|
| 36 | 43 | |
|---|
| 37 | | -struct same_file_record { |
|---|
| 38 | | - enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */ |
|---|
| 39 | | - u32 block_index : 30; |
|---|
| 40 | | - u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */ |
|---|
| 41 | | -} __packed; /* 12 bytes */ |
|---|
| 44 | +struct same_file { |
|---|
| 45 | + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE */ |
|---|
| 46 | + u32 block_index : 29; |
|---|
| 47 | + uid_t uid; |
|---|
| 48 | + u16 relative_ts_us; /* max 2^16 us ~= 64 ms */ |
|---|
| 49 | +} __packed; /* 10 bytes */ |
|---|
| 42 | 50 | |
|---|
| 43 | | -struct same_file_next_block { |
|---|
| 44 | | - enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */ |
|---|
| 45 | | - u32 relative_ts_us : 30; /* max 2^30 us ~= 15 min (17:50) */ |
|---|
| 51 | +struct same_file_close_block { |
|---|
| 52 | + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK */ |
|---|
| 53 | + u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */ |
|---|
| 54 | + s16 block_index_delta; |
|---|
| 46 | 55 | } __packed; /* 4 bytes */ |
|---|
| 47 | 56 | |
|---|
| 48 | | -struct same_file_next_block_short { |
|---|
| 49 | | - enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK_SHORT */ |
|---|
| 50 | | - u16 relative_ts_us : 14; /* max 2^14 us ~= 16 ms */ |
|---|
| 57 | +struct same_file_close_block_short { |
|---|
| 58 | + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK_SHORT */ |
|---|
| 59 | + u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */ |
|---|
| 60 | + s8 block_index_delta; |
|---|
| 51 | 61 | } __packed; /* 2 bytes */ |
|---|
| 62 | + |
|---|
| 63 | +struct same_file_next_block { |
|---|
| 64 | + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK */ |
|---|
| 65 | + u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */ |
|---|
| 66 | +} __packed; /* 2 bytes */ |
|---|
| 67 | + |
|---|
| 68 | +struct same_file_next_block_short { |
|---|
| 69 | + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK_SHORT */ |
|---|
| 70 | + u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */ |
|---|
| 71 | +} __packed; /* 1 byte */ |
|---|
| 52 | 72 | |
|---|
| 53 | 73 | union log_record { |
|---|
| 54 | 74 | struct full_record full_record; |
|---|
| 55 | | - struct same_file_record same_file_record; |
|---|
| 75 | + struct same_file same_file; |
|---|
| 76 | + struct same_file_close_block same_file_close_block; |
|---|
| 77 | + struct same_file_close_block_short same_file_close_block_short; |
|---|
| 56 | 78 | struct same_file_next_block same_file_next_block; |
|---|
| 57 | 79 | struct same_file_next_block_short same_file_next_block_short; |
|---|
| 58 | 80 | }; |
|---|
| .. | .. |
|---|
| 99 | 121 | unsigned int readahead_pages; |
|---|
| 100 | 122 | unsigned int read_log_pages; |
|---|
| 101 | 123 | unsigned int read_log_wakeup_count; |
|---|
| 102 | | - bool no_backing_file_cache; |
|---|
| 103 | | - bool no_backing_file_readahead; |
|---|
| 124 | + bool report_uid; |
|---|
| 125 | + char *sysfs_name; |
|---|
| 104 | 126 | }; |
|---|
| 105 | 127 | |
|---|
| 106 | 128 | struct mount_info { |
|---|
| .. | .. |
|---|
| 109 | 131 | struct path mi_backing_dir_path; |
|---|
| 110 | 132 | |
|---|
| 111 | 133 | struct dentry *mi_index_dir; |
|---|
| 134 | + /* For stacking mounts, if true, this indicates if the index dir needs |
|---|
| 135 | + * to be freed for this SB otherwise it was created by lower level SB */ |
|---|
| 136 | + bool mi_index_free; |
|---|
| 137 | + |
|---|
| 138 | + struct dentry *mi_incomplete_dir; |
|---|
| 139 | + /* For stacking mounts, if true, this indicates if the incomplete dir |
|---|
| 140 | + * needs to be freed for this SB. Similar to mi_index_free */ |
|---|
| 141 | + bool mi_incomplete_free; |
|---|
| 112 | 142 | |
|---|
| 113 | 143 | const struct cred *mi_owner; |
|---|
| 114 | 144 | |
|---|
| .. | .. |
|---|
| 123 | 153 | wait_queue_head_t mi_pending_reads_notif_wq; |
|---|
| 124 | 154 | |
|---|
| 125 | 155 | /* |
|---|
| 126 | | - * Protects: |
|---|
| 156 | + * Protects - RCU safe: |
|---|
| 127 | 157 | * - reads_list_head |
|---|
| 128 | 158 | * - mi_pending_reads_count |
|---|
| 129 | 159 | * - mi_last_pending_read_number |
|---|
| 130 | 160 | * - data_file_segment.reads_list_head |
|---|
| 131 | 161 | */ |
|---|
| 132 | | - struct mutex mi_pending_reads_mutex; |
|---|
| 162 | + spinlock_t pending_read_lock; |
|---|
| 133 | 163 | |
|---|
| 134 | 164 | /* List of active pending_read objects */ |
|---|
| 135 | 165 | struct list_head mi_reads_list_head; |
|---|
| .. | .. |
|---|
| 146 | 176 | /* Temporary buffer for read logger. */ |
|---|
| 147 | 177 | struct read_log mi_log; |
|---|
| 148 | 178 | |
|---|
| 149 | | - void *log_xattr; |
|---|
| 150 | | - size_t log_xattr_size; |
|---|
| 179 | + /* SELinux needs special xattrs on our pseudo files */ |
|---|
| 180 | + struct mem_range pseudo_file_xattr[PSEUDO_FILE_COUNT]; |
|---|
| 151 | 181 | |
|---|
| 152 | | - void *pending_read_xattr; |
|---|
| 153 | | - size_t pending_read_xattr_size; |
|---|
| 182 | + /* A queue of waiters who want to be notified about blocks_written */ |
|---|
| 183 | + wait_queue_head_t mi_blocks_written_notif_wq; |
|---|
| 184 | + |
|---|
| 185 | + /* Number of blocks written since mount */ |
|---|
| 186 | + atomic_t mi_blocks_written; |
|---|
| 187 | + |
|---|
| 188 | + /* Per UID read timeouts */ |
|---|
| 189 | + spinlock_t mi_per_uid_read_timeouts_lock; |
|---|
| 190 | + struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; |
|---|
| 191 | + int mi_per_uid_read_timeouts_size; |
|---|
| 192 | + |
|---|
| 193 | + /* zstd workspace */ |
|---|
| 194 | + struct mutex mi_zstd_workspace_mutex; |
|---|
| 195 | + void *mi_zstd_workspace; |
|---|
| 196 | + ZSTD_DStream *mi_zstd_stream; |
|---|
| 197 | + struct delayed_work mi_zstd_cleanup_work; |
|---|
| 198 | + |
|---|
| 199 | + /* sysfs node */ |
|---|
| 200 | + struct incfs_sysfs_node *mi_sysfs_node; |
|---|
| 201 | + |
|---|
| 202 | + /* Last error information */ |
|---|
| 203 | + struct mutex mi_le_mutex; |
|---|
| 204 | + incfs_uuid_t mi_le_file_id; |
|---|
| 205 | + u64 mi_le_time_us; |
|---|
| 206 | + u32 mi_le_page; |
|---|
| 207 | + u32 mi_le_errno; |
|---|
| 208 | + uid_t mi_le_uid; |
|---|
| 209 | + |
|---|
| 210 | + /* Number of reads timed out */ |
|---|
| 211 | + u32 mi_reads_failed_timed_out; |
|---|
| 212 | + |
|---|
| 213 | + /* Number of reads failed because hash verification failed */ |
|---|
| 214 | + u32 mi_reads_failed_hash_verification; |
|---|
| 215 | + |
|---|
| 216 | + /* Number of reads failed for another reason */ |
|---|
| 217 | + u32 mi_reads_failed_other; |
|---|
| 218 | + |
|---|
| 219 | + /* Number of reads delayed because page had to be fetched */ |
|---|
| 220 | + u32 mi_reads_delayed_pending; |
|---|
| 221 | + |
|---|
| 222 | + /* Total time waiting for pages to be fetched */ |
|---|
| 223 | + u64 mi_reads_delayed_pending_us; |
|---|
| 224 | + |
|---|
| 225 | + /* |
|---|
| 226 | + * Number of reads delayed because of per-uid min_time_us or |
|---|
| 227 | + * min_pending_time_us settings |
|---|
| 228 | + */ |
|---|
| 229 | + u32 mi_reads_delayed_min; |
|---|
| 230 | + |
|---|
| 231 | + /* Total time waiting because of per-uid min_time_us or |
|---|
| 232 | + * min_pending_time_us settings. |
|---|
| 233 | + * |
|---|
| 234 | + * Note that if a read is initially delayed because we have to wait for |
|---|
| 235 | + * the page, then further delayed because of min_pending_time_us |
|---|
| 236 | + * setting, this counter gets incremented by only the further delay |
|---|
| 237 | + * time. |
|---|
| 238 | + */ |
|---|
| 239 | + u64 mi_reads_delayed_min_us; |
|---|
| 154 | 240 | }; |
|---|
| 155 | 241 | |
|---|
| 156 | 242 | struct data_file_block { |
|---|
| .. | .. |
|---|
| 172 | 258 | |
|---|
| 173 | 259 | int serial_number; |
|---|
| 174 | 260 | |
|---|
| 261 | + uid_t uid; |
|---|
| 262 | + |
|---|
| 175 | 263 | struct list_head mi_reads_list; |
|---|
| 176 | 264 | |
|---|
| 177 | 265 | struct list_head segment_reads_list; |
|---|
| 266 | + |
|---|
| 267 | + struct rcu_head rcu; |
|---|
| 178 | 268 | }; |
|---|
| 179 | 269 | |
|---|
| 180 | 270 | struct data_file_segment { |
|---|
| 181 | 271 | wait_queue_head_t new_data_arrival_wq; |
|---|
| 182 | 272 | |
|---|
| 183 | 273 | /* Protects reads and writes from the blockmap */ |
|---|
| 184 | | - /* Good candidate for read/write mutex */ |
|---|
| 185 | | - struct mutex blockmap_mutex; |
|---|
| 274 | + struct rw_semaphore rwsem; |
|---|
| 186 | 275 | |
|---|
| 187 | 276 | /* List of active pending_read objects belonging to this segment */ |
|---|
| 188 | 277 | /* Protected by mount_info.pending_reads_mutex */ |
|---|
| .. | .. |
|---|
| 232 | 321 | /* Total number of blocks, data + hash */ |
|---|
| 233 | 322 | int df_total_block_count; |
|---|
| 234 | 323 | |
|---|
| 235 | | - struct file_attr n_attr; |
|---|
| 324 | + /* For mapped files, the offset into the actual file */ |
|---|
| 325 | + loff_t df_mapped_offset; |
|---|
| 236 | 326 | |
|---|
| 327 | + /* Number of data blocks written to file */ |
|---|
| 328 | + atomic_t df_data_blocks_written; |
|---|
| 329 | + |
|---|
| 330 | + /* Number of data blocks in the status block */ |
|---|
| 331 | + u32 df_initial_data_blocks_written; |
|---|
| 332 | + |
|---|
| 333 | + /* Number of hash blocks written to file */ |
|---|
| 334 | + atomic_t df_hash_blocks_written; |
|---|
| 335 | + |
|---|
| 336 | + /* Number of hash blocks in the status block */ |
|---|
| 337 | + u32 df_initial_hash_blocks_written; |
|---|
| 338 | + |
|---|
| 339 | + /* Offset to status metadata header */ |
|---|
| 340 | + loff_t df_status_offset; |
|---|
| 341 | + |
|---|
| 342 | + /* |
|---|
| 343 | + * Mutex acquired while enabling verity. Note that df_hash_tree is set |
|---|
| 344 | + * by enable verity. |
|---|
| 345 | + * |
|---|
| 346 | + * The backing file mutex bc_mutex may be taken while this mutex is |
|---|
| 347 | + * held. |
|---|
| 348 | + */ |
|---|
| 349 | + struct mutex df_enable_verity; |
|---|
| 350 | + |
|---|
| 351 | + /* |
|---|
| 352 | + * Set either at construction time or during enabling verity. In the |
|---|
| 353 | + * latter case, set via smp_store_release, so use smp_load_acquire to |
|---|
| 354 | + * read it. |
|---|
| 355 | + */ |
|---|
| 237 | 356 | struct mtree *df_hash_tree; |
|---|
| 238 | 357 | |
|---|
| 358 | + /* Guaranteed set if df_hash_tree is set. */ |
|---|
| 239 | 359 | struct incfs_df_signature *df_signature; |
|---|
| 360 | + |
|---|
| 361 | + /* |
|---|
| 362 | + * The verity file digest, set when verity is enabled and the file has |
|---|
| 363 | + * been opened |
|---|
| 364 | + */ |
|---|
| 365 | + struct mem_range df_verity_file_digest; |
|---|
| 366 | + |
|---|
| 367 | + struct incfs_df_verity_signature *df_verity_signature; |
|---|
| 240 | 368 | }; |
|---|
| 241 | 369 | |
|---|
| 242 | 370 | struct dir_file { |
|---|
| .. | .. |
|---|
| 259 | 387 | struct path backing_path; |
|---|
| 260 | 388 | }; |
|---|
| 261 | 389 | |
|---|
| 390 | +enum FILL_PERMISSION { |
|---|
| 391 | + CANT_FILL = 0, |
|---|
| 392 | + CAN_FILL = 1, |
|---|
| 393 | +}; |
|---|
| 394 | + |
|---|
| 395 | +struct incfs_file_data { |
|---|
| 396 | + /* Does this file handle have INCFS_IOC_FILL_BLOCKS permission */ |
|---|
| 397 | + enum FILL_PERMISSION fd_fill_permission; |
|---|
| 398 | + |
|---|
| 399 | + /* If INCFS_IOC_GET_FILLED_BLOCKS has been called, where are we */ |
|---|
| 400 | + int fd_get_block_pos; |
|---|
| 401 | + |
|---|
| 402 | + /* And how many filled blocks are there up to that point */ |
|---|
| 403 | + int fd_filled_data_blocks; |
|---|
| 404 | + int fd_filled_hash_blocks; |
|---|
| 405 | +}; |
|---|
| 406 | + |
|---|
| 262 | 407 | struct mount_info *incfs_alloc_mount_info(struct super_block *sb, |
|---|
| 263 | 408 | struct mount_options *options, |
|---|
| 264 | 409 | struct path *backing_dir_path); |
|---|
| .. | .. |
|---|
| 268 | 413 | |
|---|
| 269 | 414 | void incfs_free_mount_info(struct mount_info *mi); |
|---|
| 270 | 415 | |
|---|
| 416 | +char *file_id_to_str(incfs_uuid_t id); |
|---|
| 417 | +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name); |
|---|
| 271 | 418 | struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); |
|---|
| 272 | 419 | void incfs_free_data_file(struct data_file *df); |
|---|
| 273 | | - |
|---|
| 274 | | -int incfs_scan_metadata_chain(struct data_file *df); |
|---|
| 275 | 420 | |
|---|
| 276 | 421 | struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); |
|---|
| 277 | 422 | void incfs_free_dir_file(struct dir_file *dir); |
|---|
| 278 | 423 | |
|---|
| 424 | +struct incfs_read_data_file_timeouts { |
|---|
| 425 | + u32 min_time_us; |
|---|
| 426 | + u32 min_pending_time_us; |
|---|
| 427 | + u32 max_pending_time_us; |
|---|
| 428 | +}; |
|---|
| 429 | + |
|---|
| 279 | 430 | ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, |
|---|
| 280 | | - int index, int timeout_ms, |
|---|
| 281 | | - struct mem_range tmp); |
|---|
| 431 | + int index, struct mem_range tmp, |
|---|
| 432 | + struct incfs_read_data_file_timeouts *timeouts, |
|---|
| 433 | + unsigned int *delayed_min_us); |
|---|
| 434 | + |
|---|
| 435 | +ssize_t incfs_read_merkle_tree_blocks(struct mem_range dst, |
|---|
| 436 | + struct data_file *df, size_t offset); |
|---|
| 282 | 437 | |
|---|
| 283 | 438 | int incfs_get_filled_blocks(struct data_file *df, |
|---|
| 439 | + struct incfs_file_data *fd, |
|---|
| 284 | 440 | struct incfs_get_filled_blocks_args *arg); |
|---|
| 285 | 441 | |
|---|
| 286 | 442 | int incfs_read_file_signature(struct data_file *df, struct mem_range dst); |
|---|
| 287 | 443 | |
|---|
| 288 | 444 | int incfs_process_new_data_block(struct data_file *df, |
|---|
| 289 | | - struct incfs_fill_block *block, u8 *data); |
|---|
| 445 | + struct incfs_fill_block *block, u8 *data, |
|---|
| 446 | + bool *complete); |
|---|
| 290 | 447 | |
|---|
| 291 | 448 | int incfs_process_new_hash_block(struct data_file *df, |
|---|
| 292 | 449 | struct incfs_fill_block *block, u8 *data); |
|---|
| .. | .. |
|---|
| 300 | 457 | */ |
|---|
| 301 | 458 | int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, |
|---|
| 302 | 459 | struct incfs_pending_read_info *reads, |
|---|
| 303 | | - int reads_size); |
|---|
| 460 | + struct incfs_pending_read_info2 *reads2, |
|---|
| 461 | + int reads_size, int *new_max_sn); |
|---|
| 304 | 462 | |
|---|
| 305 | 463 | int incfs_collect_logged_reads(struct mount_info *mi, |
|---|
| 306 | 464 | struct read_log_state *start_state, |
|---|
| 307 | 465 | struct incfs_pending_read_info *reads, |
|---|
| 466 | + struct incfs_pending_read_info2 *reads2, |
|---|
| 308 | 467 | int reads_size); |
|---|
| 309 | 468 | struct read_log_state incfs_get_log_state(struct mount_info *mi); |
|---|
| 310 | 469 | int incfs_get_uncollected_logs_count(struct mount_info *mi, |
|---|
| .. | .. |
|---|
| 315 | 474 | if (!inode) |
|---|
| 316 | 475 | return NULL; |
|---|
| 317 | 476 | |
|---|
| 318 | | - if (inode->i_sb->s_magic != (long) INCFS_MAGIC_NUMBER) { |
|---|
| 477 | + if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) { |
|---|
| 319 | 478 | /* This inode doesn't belong to us. */ |
|---|
| 320 | 479 | pr_warn_once("incfs: %s on an alien inode.", __func__); |
|---|
| 321 | 480 | return NULL; |
|---|
| .. | .. |
|---|
| 388 | 547 | return 0; |
|---|
| 389 | 548 | return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; |
|---|
| 390 | 549 | } |
|---|
| 391 | | - |
|---|
| 392 | | -bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs); |
|---|
| 393 | 550 | |
|---|
| 394 | 551 | #endif /* _INCFS_DATA_MGMT_H */ |
|---|