| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * mm/readahead.c - address_space-level file readahead. |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 21 | 22 | #include <linux/mm_inline.h> |
|---|
| 22 | 23 | #include <linux/blk-cgroup.h> |
|---|
| 23 | 24 | #include <linux/fadvise.h> |
|---|
| 25 | +#include <linux/sched/mm.h> |
|---|
| 26 | +#include <trace/hooks/mm.h> |
|---|
| 24 | 27 | |
|---|
| 25 | 28 | #include "internal.h" |
|---|
| 29 | + |
|---|
| 30 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
|---|
| 31 | +#include <linux/fscrypt.h> |
|---|
| 32 | +#endif |
|---|
| 26 | 33 | |
|---|
| 27 | 34 | /* |
|---|
| 28 | 35 | * Initialise a struct file's readahead state. Assumes that the caller has |
|---|
| .. | .. |
|---|
| 81 | 88 | * @data: private data for the callback routine. |
|---|
| 82 | 89 | * |
|---|
| 83 | 90 | * Hides the details of the LRU cache etc from the filesystems. |
|---|
| 91 | + * |
|---|
| 92 | + * Returns: %0 on success, error return by @filler otherwise |
|---|
| 84 | 93 | */ |
|---|
| 85 | 94 | int read_cache_pages(struct address_space *mapping, struct list_head *pages, |
|---|
| 86 | 95 | int (*filler)(void *, struct page *), void *data) |
|---|
| .. | .. |
|---|
| 110 | 119 | |
|---|
| 111 | 120 | EXPORT_SYMBOL(read_cache_pages); |
|---|
| 112 | 121 | |
|---|
| 113 | | -static int read_pages(struct address_space *mapping, struct file *filp, |
|---|
| 114 | | - struct list_head *pages, unsigned int nr_pages, gfp_t gfp) |
|---|
| 122 | +gfp_t readahead_gfp_mask(struct address_space *x) |
|---|
| 115 | 123 | { |
|---|
| 124 | + gfp_t mask = mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; |
|---|
| 125 | + |
|---|
| 126 | + trace_android_rvh_set_readahead_gfp_mask(&mask); |
|---|
| 127 | + return mask; |
|---|
| 128 | +} |
|---|
| 129 | +EXPORT_SYMBOL_GPL(readahead_gfp_mask); |
|---|
| 130 | + |
|---|
| 131 | +static void read_pages(struct readahead_control *rac, struct list_head *pages, |
|---|
| 132 | + bool skip_page) |
|---|
| 133 | +{ |
|---|
| 134 | + const struct address_space_operations *aops = rac->mapping->a_ops; |
|---|
| 135 | + struct page *page; |
|---|
| 116 | 136 | struct blk_plug plug; |
|---|
| 117 | | - unsigned page_idx; |
|---|
| 118 | | - int ret; |
|---|
| 137 | + |
|---|
| 138 | + if (!readahead_count(rac)) |
|---|
| 139 | + goto out; |
|---|
| 119 | 140 | |
|---|
| 120 | 141 | blk_start_plug(&plug); |
|---|
| 121 | 142 | |
|---|
| 122 | | - if (mapping->a_ops->readpages) { |
|---|
| 123 | | - ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); |
|---|
| 143 | + if (aops->readahead) { |
|---|
| 144 | + aops->readahead(rac); |
|---|
| 145 | + /* Clean up the remaining pages */ |
|---|
| 146 | + while ((page = readahead_page(rac))) { |
|---|
| 147 | + unlock_page(page); |
|---|
| 148 | + put_page(page); |
|---|
| 149 | + } |
|---|
| 150 | + } else if (aops->readpages) { |
|---|
| 151 | + aops->readpages(rac->file, rac->mapping, pages, |
|---|
| 152 | + readahead_count(rac)); |
|---|
| 124 | 153 | /* Clean up the remaining pages */ |
|---|
| 125 | 154 | put_pages_list(pages); |
|---|
| 126 | | - goto out; |
|---|
| 155 | + rac->_index += rac->_nr_pages; |
|---|
| 156 | + rac->_nr_pages = 0; |
|---|
| 157 | + } else { |
|---|
| 158 | + while ((page = readahead_page(rac))) { |
|---|
| 159 | + aops->readpage(rac->file, page); |
|---|
| 160 | + put_page(page); |
|---|
| 161 | + } |
|---|
| 127 | 162 | } |
|---|
| 128 | 163 | |
|---|
| 129 | | - for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
|---|
| 130 | | - struct page *page = lru_to_page(pages); |
|---|
| 131 | | - list_del(&page->lru); |
|---|
| 132 | | - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) |
|---|
| 133 | | - mapping->a_ops->readpage(filp, page); |
|---|
| 134 | | - put_page(page); |
|---|
| 135 | | - } |
|---|
| 136 | | - ret = 0; |
|---|
| 137 | | - |
|---|
| 138 | | -out: |
|---|
| 139 | 164 | blk_finish_plug(&plug); |
|---|
| 140 | 165 | |
|---|
| 141 | | - return ret; |
|---|
| 166 | + BUG_ON(!list_empty(pages)); |
|---|
| 167 | + BUG_ON(readahead_count(rac)); |
|---|
| 168 | + |
|---|
| 169 | +out: |
|---|
| 170 | + if (skip_page) |
|---|
| 171 | + rac->_index++; |
|---|
| 142 | 172 | } |
|---|
| 143 | 173 | |
|---|
| 144 | | -/* |
|---|
| 145 | | - * __do_page_cache_readahead() actually reads a chunk of disk. It allocates |
|---|
| 146 | | - * the pages first, then submits them for I/O. This avoids the very bad |
|---|
| 147 | | - * behaviour which would occur if page allocations are causing VM writeback. |
|---|
| 148 | | - * We really don't want to intermingle reads and writes like that. |
|---|
| 174 | +/** |
|---|
| 175 | + * page_cache_ra_unbounded - Start unchecked readahead. |
|---|
| 176 | + * @ractl: Readahead control. |
|---|
| 177 | + * @nr_to_read: The number of pages to read. |
|---|
| 178 | + * @lookahead_size: Where to start the next readahead. |
|---|
| 149 | 179 | * |
|---|
| 150 | | - * Returns the number of pages requested, or the maximum amount of I/O allowed. |
|---|
| 180 | + * This function is for filesystems to call when they want to start |
|---|
| 181 | + * readahead beyond a file's stated i_size. This is almost certainly |
|---|
| 182 | + * not the function you want to call. Use page_cache_async_readahead() |
|---|
| 183 | + * or page_cache_sync_readahead() instead. |
|---|
| 184 | + * |
|---|
| 185 | + * Context: File is referenced by caller. Mutexes may be held by caller. |
|---|
| 186 | + * May sleep, but will not reenter filesystem to reclaim memory. |
|---|
| 151 | 187 | */ |
|---|
| 152 | | -unsigned int __do_page_cache_readahead(struct address_space *mapping, |
|---|
| 153 | | - struct file *filp, pgoff_t offset, unsigned long nr_to_read, |
|---|
| 154 | | - unsigned long lookahead_size) |
|---|
| 188 | +void page_cache_ra_unbounded(struct readahead_control *ractl, |
|---|
| 189 | + unsigned long nr_to_read, unsigned long lookahead_size) |
|---|
| 155 | 190 | { |
|---|
| 156 | | - struct inode *inode = mapping->host; |
|---|
| 157 | | - struct page *page; |
|---|
| 158 | | - unsigned long end_index; /* The last page we want to read */ |
|---|
| 191 | + struct address_space *mapping = ractl->mapping; |
|---|
| 192 | + unsigned long index = readahead_index(ractl); |
|---|
| 159 | 193 | LIST_HEAD(page_pool); |
|---|
| 160 | | - int page_idx; |
|---|
| 161 | | - unsigned int nr_pages = 0; |
|---|
| 162 | | - loff_t isize = i_size_read(inode); |
|---|
| 163 | 194 | gfp_t gfp_mask = readahead_gfp_mask(mapping); |
|---|
| 195 | + unsigned long i; |
|---|
| 164 | 196 | |
|---|
| 165 | | - if (isize == 0) |
|---|
| 166 | | - goto out; |
|---|
| 167 | | - |
|---|
| 168 | | - end_index = ((isize - 1) >> PAGE_SHIFT); |
|---|
| 197 | + /* |
|---|
| 198 | + * Partway through the readahead operation, we will have added |
|---|
| 199 | + * locked pages to the page cache, but will not yet have submitted |
|---|
| 200 | + * them for I/O. Adding another page may need to allocate memory, |
|---|
| 201 | + * which can trigger memory reclaim. Telling the VM we're in |
|---|
| 202 | + * the middle of a filesystem operation will cause it to not |
|---|
| 203 | + * touch file-backed pages, preventing a deadlock. Most (all?) |
|---|
| 204 | + * filesystems already specify __GFP_NOFS in their mapping's |
|---|
| 205 | + * gfp_mask, but let's be explicit here. |
|---|
| 206 | + */ |
|---|
| 207 | + unsigned int nofs = memalloc_nofs_save(); |
|---|
| 169 | 208 | |
|---|
| 170 | 209 | /* |
|---|
| 171 | 210 | * Preallocate as many pages as we will need. |
|---|
| 172 | 211 | */ |
|---|
| 173 | | - for (page_idx = 0; page_idx < nr_to_read; page_idx++) { |
|---|
| 174 | | - pgoff_t page_offset = offset + page_idx; |
|---|
| 212 | + for (i = 0; i < nr_to_read; i++) { |
|---|
| 213 | + struct page *page = xa_load(&mapping->i_pages, index + i); |
|---|
| 175 | 214 | |
|---|
| 176 | | - if (page_offset > end_index) |
|---|
| 177 | | - break; |
|---|
| 215 | + BUG_ON(index + i != ractl->_index + ractl->_nr_pages); |
|---|
| 178 | 216 | |
|---|
| 179 | | - rcu_read_lock(); |
|---|
| 180 | | - page = radix_tree_lookup(&mapping->i_pages, page_offset); |
|---|
| 181 | | - rcu_read_unlock(); |
|---|
| 182 | | - if (page && !radix_tree_exceptional_entry(page)) { |
|---|
| 217 | + if (page && !xa_is_value(page)) { |
|---|
| 183 | 218 | /* |
|---|
| 184 | | - * Page already present? Kick off the current batch of |
|---|
| 185 | | - * contiguous pages before continuing with the next |
|---|
| 186 | | - * batch. |
|---|
| 219 | + * Page already present? Kick off the current batch |
|---|
| 220 | + * of contiguous pages before continuing with the |
|---|
| 221 | + * next batch. This page may be the one we would |
|---|
| 222 | + * have intended to mark as Readahead, but we don't |
|---|
| 223 | + * have a stable reference to this page, and it's |
|---|
| 224 | + * not worth getting one just for that. |
|---|
| 187 | 225 | */ |
|---|
| 188 | | - if (nr_pages) |
|---|
| 189 | | - read_pages(mapping, filp, &page_pool, nr_pages, |
|---|
| 190 | | - gfp_mask); |
|---|
| 191 | | - nr_pages = 0; |
|---|
| 226 | + read_pages(ractl, &page_pool, true); |
|---|
| 192 | 227 | continue; |
|---|
| 193 | 228 | } |
|---|
| 194 | 229 | |
|---|
| 195 | 230 | page = __page_cache_alloc(gfp_mask); |
|---|
| 196 | 231 | if (!page) |
|---|
| 197 | 232 | break; |
|---|
| 198 | | - page->index = page_offset; |
|---|
| 199 | | - list_add(&page->lru, &page_pool); |
|---|
| 200 | | - if (page_idx == nr_to_read - lookahead_size) |
|---|
| 233 | + if (mapping->a_ops->readpages) { |
|---|
| 234 | + page->index = index + i; |
|---|
| 235 | + list_add(&page->lru, &page_pool); |
|---|
| 236 | + } else if (add_to_page_cache_lru(page, mapping, index + i, |
|---|
| 237 | + gfp_mask) < 0) { |
|---|
| 238 | + put_page(page); |
|---|
| 239 | + read_pages(ractl, &page_pool, true); |
|---|
| 240 | + continue; |
|---|
| 241 | + } |
|---|
| 242 | + if (i == nr_to_read - lookahead_size) |
|---|
| 201 | 243 | SetPageReadahead(page); |
|---|
| 202 | | - nr_pages++; |
|---|
| 244 | + ractl->_nr_pages++; |
|---|
| 203 | 245 | } |
|---|
| 204 | 246 | |
|---|
| 205 | 247 | /* |
|---|
| .. | .. |
|---|
| 207 | 249 | * uptodate then the caller will launch readpage again, and |
|---|
| 208 | 250 | * will then handle the error. |
|---|
| 209 | 251 | */ |
|---|
| 210 | | - if (nr_pages) |
|---|
| 211 | | - read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask); |
|---|
| 212 | | - BUG_ON(!list_empty(&page_pool)); |
|---|
| 213 | | -out: |
|---|
| 214 | | - return nr_pages; |
|---|
| 252 | + read_pages(ractl, &page_pool, false); |
|---|
| 253 | + memalloc_nofs_restore(nofs); |
|---|
| 254 | +} |
|---|
| 255 | +EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); |
|---|
| 256 | + |
|---|
| 257 | +/* |
|---|
| 258 | + * do_page_cache_ra() actually reads a chunk of disk. It allocates |
|---|
| 259 | + * the pages first, then submits them for I/O. This avoids the very bad |
|---|
| 260 | + * behaviour which would occur if page allocations are causing VM writeback. |
|---|
| 261 | + * We really don't want to intermingle reads and writes like that. |
|---|
| 262 | + */ |
|---|
| 263 | +void do_page_cache_ra(struct readahead_control *ractl, |
|---|
| 264 | + unsigned long nr_to_read, unsigned long lookahead_size) |
|---|
| 265 | +{ |
|---|
| 266 | + struct inode *inode = ractl->mapping->host; |
|---|
| 267 | + unsigned long index = readahead_index(ractl); |
|---|
| 268 | + loff_t isize = i_size_read(inode); |
|---|
| 269 | + pgoff_t end_index; /* The last page we want to read */ |
|---|
| 270 | + |
|---|
| 271 | + if (isize == 0) |
|---|
| 272 | + return; |
|---|
| 273 | + |
|---|
| 274 | + end_index = (isize - 1) >> PAGE_SHIFT; |
|---|
| 275 | + if (index > end_index) |
|---|
| 276 | + return; |
|---|
| 277 | + /* Don't read past the page containing the last byte of the file */ |
|---|
| 278 | + if (nr_to_read > end_index - index) |
|---|
| 279 | + nr_to_read = end_index - index + 1; |
|---|
| 280 | + |
|---|
| 281 | + page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size); |
|---|
| 215 | 282 | } |
|---|
| 216 | 283 | |
|---|
| 217 | 284 | /* |
|---|
| 218 | 285 | * Chunk the readahead into 2 megabyte units, so that we don't pin too much |
|---|
| 219 | 286 | * memory at once. |
|---|
| 220 | 287 | */ |
|---|
| 221 | | -int force_page_cache_readahead(struct address_space *mapping, struct file *filp, |
|---|
| 222 | | - pgoff_t offset, unsigned long nr_to_read) |
|---|
| 288 | +void force_page_cache_ra(struct readahead_control *ractl, |
|---|
| 289 | + struct file_ra_state *ra, unsigned long nr_to_read) |
|---|
| 223 | 290 | { |
|---|
| 291 | + struct address_space *mapping = ractl->mapping; |
|---|
| 224 | 292 | struct backing_dev_info *bdi = inode_to_bdi(mapping->host); |
|---|
| 225 | | - struct file_ra_state *ra = &filp->f_ra; |
|---|
| 226 | | - unsigned long max_pages; |
|---|
| 293 | + unsigned long max_pages, index; |
|---|
| 294 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
|---|
| 295 | + bool force_lookahead = false; |
|---|
| 296 | +#endif |
|---|
| 227 | 297 | |
|---|
| 228 | | - if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) |
|---|
| 229 | | - return -EINVAL; |
|---|
| 298 | + if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages && |
|---|
| 299 | + !mapping->a_ops->readahead)) |
|---|
| 300 | + return; |
|---|
| 230 | 301 | |
|---|
| 231 | 302 | /* |
|---|
| 232 | 303 | * If the request exceeds the readahead window, allow the read to |
|---|
| 233 | 304 | * be up to the optimal hardware IO size |
|---|
| 234 | 305 | */ |
|---|
| 306 | + index = readahead_index(ractl); |
|---|
| 235 | 307 | max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); |
|---|
| 236 | | - nr_to_read = min(nr_to_read, max_pages); |
|---|
| 308 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
|---|
| 309 | + /* For files with fscrypt enabled, to allow IO and the encryption |
|---|
| 310 | + * or decryption process to ping-pong, lookahead is forcibly enabled. |
|---|
| 311 | + */ |
|---|
| 312 | + if (nr_to_read > max_pages && fscrypt_inode_uses_fs_layer_crypto(mapping->host)) |
|---|
| 313 | + force_lookahead = true; |
|---|
| 314 | +#endif |
|---|
| 315 | + nr_to_read = min_t(unsigned long, nr_to_read, max_pages); |
|---|
| 237 | 316 | while (nr_to_read) { |
|---|
| 238 | 317 | unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE; |
|---|
| 239 | 318 | |
|---|
| 240 | 319 | if (this_chunk > nr_to_read) |
|---|
| 241 | 320 | this_chunk = nr_to_read; |
|---|
| 242 | | - __do_page_cache_readahead(mapping, filp, offset, this_chunk, 0); |
|---|
| 321 | + ractl->_index = index; |
|---|
| 322 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
|---|
| 323 | + if (force_lookahead) |
|---|
| 324 | + do_page_cache_ra(ractl, this_chunk, this_chunk / 2); |
|---|
| 325 | + else |
|---|
| 326 | + do_page_cache_ra(ractl, this_chunk, 0); |
|---|
| 327 | +#else |
|---|
| 328 | + do_page_cache_ra(ractl, this_chunk, 0); |
|---|
| 329 | +#endif |
|---|
| 243 | 330 | |
|---|
| 244 | | - offset += this_chunk; |
|---|
| 331 | + index += this_chunk; |
|---|
| 245 | 332 | nr_to_read -= this_chunk; |
|---|
| 246 | 333 | } |
|---|
| 247 | | - return 0; |
|---|
| 248 | 334 | } |
|---|
| 249 | 335 | |
|---|
| 250 | 336 | /* |
|---|
| .. | .. |
|---|
| 272 | 358 | * return it as the new window size. |
|---|
| 273 | 359 | */ |
|---|
| 274 | 360 | static unsigned long get_next_ra_size(struct file_ra_state *ra, |
|---|
| 275 | | - unsigned long max) |
|---|
| 361 | + unsigned long max) |
|---|
| 276 | 362 | { |
|---|
| 277 | 363 | unsigned long cur = ra->size; |
|---|
| 278 | | - unsigned long newsize; |
|---|
| 279 | 364 | |
|---|
| 280 | 365 | if (cur < max / 16) |
|---|
| 281 | | - newsize = 4 * cur; |
|---|
| 282 | | - else |
|---|
| 283 | | - newsize = 2 * cur; |
|---|
| 284 | | - |
|---|
| 285 | | - return min(newsize, max); |
|---|
| 366 | + return 4 * cur; |
|---|
| 367 | + if (cur <= max / 2) |
|---|
| 368 | + return 2 * cur; |
|---|
| 369 | + return max; |
|---|
| 286 | 370 | } |
|---|
| 287 | 371 | |
|---|
| 288 | 372 | /* |
|---|
| .. | .. |
|---|
| 325 | 409 | */ |
|---|
| 326 | 410 | |
|---|
| 327 | 411 | /* |
|---|
| 328 | | - * Count contiguously cached pages from @offset-1 to @offset-@max, |
|---|
| 412 | + * Count contiguously cached pages from @index-1 to @index-@max, |
|---|
| 329 | 413 | * this count is a conservative estimation of |
|---|
| 330 | 414 | * - length of the sequential read sequence, or |
|---|
| 331 | 415 | * - thrashing threshold in memory tight systems |
|---|
| 332 | 416 | */ |
|---|
| 333 | 417 | static pgoff_t count_history_pages(struct address_space *mapping, |
|---|
| 334 | | - pgoff_t offset, unsigned long max) |
|---|
| 418 | + pgoff_t index, unsigned long max) |
|---|
| 335 | 419 | { |
|---|
| 336 | 420 | pgoff_t head; |
|---|
| 337 | 421 | |
|---|
| 338 | 422 | rcu_read_lock(); |
|---|
| 339 | | - head = page_cache_prev_hole(mapping, offset - 1, max); |
|---|
| 423 | + head = page_cache_prev_miss(mapping, index - 1, max); |
|---|
| 340 | 424 | rcu_read_unlock(); |
|---|
| 341 | 425 | |
|---|
| 342 | | - return offset - 1 - head; |
|---|
| 426 | + return index - 1 - head; |
|---|
| 343 | 427 | } |
|---|
| 344 | 428 | |
|---|
| 345 | 429 | /* |
|---|
| .. | .. |
|---|
| 347 | 431 | */ |
|---|
| 348 | 432 | static int try_context_readahead(struct address_space *mapping, |
|---|
| 349 | 433 | struct file_ra_state *ra, |
|---|
| 350 | | - pgoff_t offset, |
|---|
| 434 | + pgoff_t index, |
|---|
| 351 | 435 | unsigned long req_size, |
|---|
| 352 | 436 | unsigned long max) |
|---|
| 353 | 437 | { |
|---|
| 354 | 438 | pgoff_t size; |
|---|
| 355 | 439 | |
|---|
| 356 | | - size = count_history_pages(mapping, offset, max); |
|---|
| 440 | + size = count_history_pages(mapping, index, max); |
|---|
| 357 | 441 | |
|---|
| 358 | 442 | /* |
|---|
| 359 | 443 | * not enough history pages: |
|---|
| .. | .. |
|---|
| 366 | 450 | * starts from beginning of file: |
|---|
| 367 | 451 | * it is a strong indication of long-run stream (or whole-file-read) |
|---|
| 368 | 452 | */ |
|---|
| 369 | | - if (size >= offset) |
|---|
| 453 | + if (size >= index) |
|---|
| 370 | 454 | size *= 2; |
|---|
| 371 | 455 | |
|---|
| 372 | | - ra->start = offset; |
|---|
| 456 | + ra->start = index; |
|---|
| 373 | 457 | ra->size = min(size + req_size, max); |
|---|
| 374 | 458 | ra->async_size = 1; |
|---|
| 375 | 459 | |
|---|
| .. | .. |
|---|
| 379 | 463 | /* |
|---|
| 380 | 464 | * A minimal readahead algorithm for trivial sequential/random reads. |
|---|
| 381 | 465 | */ |
|---|
| 382 | | -static unsigned long |
|---|
| 383 | | -ondemand_readahead(struct address_space *mapping, |
|---|
| 384 | | - struct file_ra_state *ra, struct file *filp, |
|---|
| 385 | | - bool hit_readahead_marker, pgoff_t offset, |
|---|
| 386 | | - unsigned long req_size) |
|---|
| 466 | +static void ondemand_readahead(struct readahead_control *ractl, |
|---|
| 467 | + struct file_ra_state *ra, bool hit_readahead_marker, |
|---|
| 468 | + unsigned long req_size) |
|---|
| 387 | 469 | { |
|---|
| 388 | | - struct backing_dev_info *bdi = inode_to_bdi(mapping->host); |
|---|
| 470 | + struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host); |
|---|
| 389 | 471 | unsigned long max_pages = ra->ra_pages; |
|---|
| 390 | 472 | unsigned long add_pages; |
|---|
| 391 | | - pgoff_t prev_offset; |
|---|
| 473 | + unsigned long index = readahead_index(ractl); |
|---|
| 474 | + pgoff_t prev_index; |
|---|
| 392 | 475 | |
|---|
| 393 | 476 | /* |
|---|
| 394 | 477 | * If the request exceeds the readahead window, allow the read to |
|---|
| .. | .. |
|---|
| 397 | 480 | if (req_size > max_pages && bdi->io_pages > max_pages) |
|---|
| 398 | 481 | max_pages = min(req_size, bdi->io_pages); |
|---|
| 399 | 482 | |
|---|
| 483 | + trace_android_vh_ra_tuning_max_page(ractl, &max_pages); |
|---|
| 484 | + |
|---|
| 400 | 485 | /* |
|---|
| 401 | 486 | * start of file |
|---|
| 402 | 487 | */ |
|---|
| 403 | | - if (!offset) |
|---|
| 488 | + if (!index) |
|---|
| 404 | 489 | goto initial_readahead; |
|---|
| 405 | 490 | |
|---|
| 406 | 491 | /* |
|---|
| 407 | | - * It's the expected callback offset, assume sequential access. |
|---|
| 492 | + * It's the expected callback index, assume sequential access. |
|---|
| 408 | 493 | * Ramp up sizes, and push forward the readahead window. |
|---|
| 409 | 494 | */ |
|---|
| 410 | | - if ((offset == (ra->start + ra->size - ra->async_size) || |
|---|
| 411 | | - offset == (ra->start + ra->size))) { |
|---|
| 495 | + if ((index == (ra->start + ra->size - ra->async_size) || |
|---|
| 496 | + index == (ra->start + ra->size))) { |
|---|
| 412 | 497 | ra->start += ra->size; |
|---|
| 413 | 498 | ra->size = get_next_ra_size(ra, max_pages); |
|---|
| 414 | 499 | ra->async_size = ra->size; |
|---|
| .. | .. |
|---|
| 425 | 510 | pgoff_t start; |
|---|
| 426 | 511 | |
|---|
| 427 | 512 | rcu_read_lock(); |
|---|
| 428 | | - start = page_cache_next_hole(mapping, offset + 1, max_pages); |
|---|
| 513 | + start = page_cache_next_miss(ractl->mapping, index + 1, |
|---|
| 514 | + max_pages); |
|---|
| 429 | 515 | rcu_read_unlock(); |
|---|
| 430 | 516 | |
|---|
| 431 | | - if (!start || start - offset > max_pages) |
|---|
| 432 | | - return 0; |
|---|
| 517 | + if (!start || start - index > max_pages) |
|---|
| 518 | + return; |
|---|
| 433 | 519 | |
|---|
| 434 | 520 | ra->start = start; |
|---|
| 435 | | - ra->size = start - offset; /* old async_size */ |
|---|
| 521 | + ra->size = start - index; /* old async_size */ |
|---|
| 436 | 522 | ra->size += req_size; |
|---|
| 437 | 523 | ra->size = get_next_ra_size(ra, max_pages); |
|---|
| 438 | 524 | ra->async_size = ra->size; |
|---|
| .. | .. |
|---|
| 447 | 533 | |
|---|
| 448 | 534 | /* |
|---|
| 449 | 535 | * sequential cache miss |
|---|
| 450 | | - * trivial case: (offset - prev_offset) == 1 |
|---|
| 451 | | - * unaligned reads: (offset - prev_offset) == 0 |
|---|
| 536 | + * trivial case: (index - prev_index) == 1 |
|---|
| 537 | + * unaligned reads: (index - prev_index) == 0 |
|---|
| 452 | 538 | */ |
|---|
| 453 | | - prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT; |
|---|
| 454 | | - if (offset - prev_offset <= 1UL) |
|---|
| 539 | + prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT; |
|---|
| 540 | + if (index - prev_index <= 1UL) |
|---|
| 455 | 541 | goto initial_readahead; |
|---|
| 456 | 542 | |
|---|
| 457 | 543 | /* |
|---|
| 458 | 544 | * Query the page cache and look for the traces(cached history pages) |
|---|
| 459 | 545 | * that a sequential stream would leave behind. |
|---|
| 460 | 546 | */ |
|---|
| 461 | | - if (try_context_readahead(mapping, ra, offset, req_size, max_pages)) |
|---|
| 547 | + if (try_context_readahead(ractl->mapping, ra, index, req_size, |
|---|
| 548 | + max_pages)) |
|---|
| 462 | 549 | goto readit; |
|---|
| 463 | 550 | |
|---|
| 464 | 551 | /* |
|---|
| 465 | 552 | * standalone, small random read |
|---|
| 466 | 553 | * Read as is, and do not pollute the readahead state. |
|---|
| 467 | 554 | */ |
|---|
| 468 | | - return __do_page_cache_readahead(mapping, filp, offset, req_size, 0); |
|---|
| 555 | + do_page_cache_ra(ractl, req_size, 0); |
|---|
| 556 | + return; |
|---|
| 469 | 557 | |
|---|
| 470 | 558 | initial_readahead: |
|---|
| 471 | | - ra->start = offset; |
|---|
| 559 | + ra->start = index; |
|---|
| 472 | 560 | ra->size = get_init_ra_size(req_size, max_pages); |
|---|
| 473 | 561 | ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; |
|---|
| 474 | 562 | |
|---|
| .. | .. |
|---|
| 479 | 567 | * the resulted next readahead window into the current one. |
|---|
| 480 | 568 | * Take care of maximum IO pages as above. |
|---|
| 481 | 569 | */ |
|---|
| 482 | | - if (offset == ra->start && ra->size == ra->async_size) { |
|---|
| 570 | + if (index == ra->start && ra->size == ra->async_size) { |
|---|
| 483 | 571 | add_pages = get_next_ra_size(ra, max_pages); |
|---|
| 484 | 572 | if (ra->size + add_pages <= max_pages) { |
|---|
| 485 | 573 | ra->async_size = add_pages; |
|---|
| .. | .. |
|---|
| 490 | 578 | } |
|---|
| 491 | 579 | } |
|---|
| 492 | 580 | |
|---|
| 493 | | - return ra_submit(ra, mapping, filp); |
|---|
| 581 | + ractl->_index = ra->start; |
|---|
| 582 | + do_page_cache_ra(ractl, ra->size, ra->async_size); |
|---|
| 494 | 583 | } |
|---|
| 495 | 584 | |
|---|
| 496 | | -/** |
|---|
| 497 | | - * page_cache_sync_readahead - generic file readahead |
|---|
| 498 | | - * @mapping: address_space which holds the pagecache and I/O vectors |
|---|
| 499 | | - * @ra: file_ra_state which holds the readahead state |
|---|
| 500 | | - * @filp: passed on to ->readpage() and ->readpages() |
|---|
| 501 | | - * @offset: start offset into @mapping, in pagecache page-sized units |
|---|
| 502 | | - * @req_size: hint: total size of the read which the caller is performing in |
|---|
| 503 | | - * pagecache pages |
|---|
| 504 | | - * |
|---|
| 505 | | - * page_cache_sync_readahead() should be called when a cache miss happened: |
|---|
| 506 | | - * it will submit the read. The readahead logic may decide to piggyback more |
|---|
| 507 | | - * pages onto the read request if access patterns suggest it will improve |
|---|
| 508 | | - * performance. |
|---|
| 509 | | - */ |
|---|
| 510 | | -void page_cache_sync_readahead(struct address_space *mapping, |
|---|
| 511 | | - struct file_ra_state *ra, struct file *filp, |
|---|
| 512 | | - pgoff_t offset, unsigned long req_size) |
|---|
| 585 | +void page_cache_sync_ra(struct readahead_control *ractl, |
|---|
| 586 | + struct file_ra_state *ra, unsigned long req_count) |
|---|
| 513 | 587 | { |
|---|
| 514 | | - /* no read-ahead */ |
|---|
| 515 | | - if (!ra->ra_pages) |
|---|
| 516 | | - return; |
|---|
| 588 | + bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); |
|---|
| 517 | 589 | |
|---|
| 518 | | - if (blk_cgroup_congested()) |
|---|
| 519 | | - return; |
|---|
| 590 | + /* |
|---|
| 591 | + * Even if read-ahead is disabled, issue this request as read-ahead |
|---|
| 592 | + * as we'll need it to satisfy the requested range. The forced |
|---|
| 593 | + * read-ahead will do the right thing and limit the read to just the |
|---|
| 594 | + * requested range, which we'll set to 1 page for this case. |
|---|
| 595 | + */ |
|---|
| 596 | + if (!ra->ra_pages || blk_cgroup_congested()) { |
|---|
| 597 | + if (!ractl->file) |
|---|
| 598 | + return; |
|---|
| 599 | + req_count = 1; |
|---|
| 600 | + do_forced_ra = true; |
|---|
| 601 | + } |
|---|
| 520 | 602 | |
|---|
| 521 | 603 | /* be dumb */ |
|---|
| 522 | | - if (filp && (filp->f_mode & FMODE_RANDOM)) { |
|---|
| 523 | | - force_page_cache_readahead(mapping, filp, offset, req_size); |
|---|
| 604 | + if (do_forced_ra) { |
|---|
| 605 | + force_page_cache_ra(ractl, ra, req_count); |
|---|
| 524 | 606 | return; |
|---|
| 525 | 607 | } |
|---|
| 526 | 608 | |
|---|
| 527 | 609 | /* do read-ahead */ |
|---|
| 528 | | - ondemand_readahead(mapping, ra, filp, false, offset, req_size); |
|---|
| 610 | + ondemand_readahead(ractl, ra, false, req_count); |
|---|
| 529 | 611 | } |
|---|
| 530 | | -EXPORT_SYMBOL_GPL(page_cache_sync_readahead); |
|---|
| 612 | +EXPORT_SYMBOL_GPL(page_cache_sync_ra); |
|---|
| 531 | 613 | |
|---|
| 532 | | -/** |
|---|
| 533 | | - * page_cache_async_readahead - file readahead for marked pages |
|---|
| 534 | | - * @mapping: address_space which holds the pagecache and I/O vectors |
|---|
| 535 | | - * @ra: file_ra_state which holds the readahead state |
|---|
| 536 | | - * @filp: passed on to ->readpage() and ->readpages() |
|---|
| 537 | | - * @page: the page at @offset which has the PG_readahead flag set |
|---|
| 538 | | - * @offset: start offset into @mapping, in pagecache page-sized units |
|---|
| 539 | | - * @req_size: hint: total size of the read which the caller is performing in |
|---|
| 540 | | - * pagecache pages |
|---|
| 541 | | - * |
|---|
| 542 | | - * page_cache_async_readahead() should be called when a page is used which |
|---|
| 543 | | - * has the PG_readahead flag; this is a marker to suggest that the application |
|---|
| 544 | | - * has used up enough of the readahead window that we should start pulling in |
|---|
| 545 | | - * more pages. |
|---|
| 546 | | - */ |
|---|
| 547 | | -void |
|---|
| 548 | | -page_cache_async_readahead(struct address_space *mapping, |
|---|
| 549 | | - struct file_ra_state *ra, struct file *filp, |
|---|
| 550 | | - struct page *page, pgoff_t offset, |
|---|
| 551 | | - unsigned long req_size) |
|---|
| 614 | +void page_cache_async_ra(struct readahead_control *ractl, |
|---|
| 615 | + struct file_ra_state *ra, struct page *page, |
|---|
| 616 | + unsigned long req_count) |
|---|
| 552 | 617 | { |
|---|
| 553 | 618 | /* no read-ahead */ |
|---|
| 554 | 619 | if (!ra->ra_pages) |
|---|
| .. | .. |
|---|
| 565 | 630 | /* |
|---|
| 566 | 631 | * Defer asynchronous read-ahead on IO congestion. |
|---|
| 567 | 632 | */ |
|---|
| 568 | | - if (inode_read_congested(mapping->host)) |
|---|
| 633 | + if (inode_read_congested(ractl->mapping->host)) |
|---|
| 569 | 634 | return; |
|---|
| 570 | 635 | |
|---|
| 571 | 636 | if (blk_cgroup_congested()) |
|---|
| 572 | 637 | return; |
|---|
| 573 | 638 | |
|---|
| 574 | 639 | /* do read-ahead */ |
|---|
| 575 | | - ondemand_readahead(mapping, ra, filp, true, offset, req_size); |
|---|
| 640 | + ondemand_readahead(ractl, ra, true, req_count); |
|---|
| 576 | 641 | } |
|---|
| 577 | | -EXPORT_SYMBOL_GPL(page_cache_async_readahead); |
|---|
| 642 | +EXPORT_SYMBOL_GPL(page_cache_async_ra); |
|---|
| 578 | 643 | |
|---|
| 579 | 644 | ssize_t ksys_readahead(int fd, loff_t offset, size_t count) |
|---|
| 580 | 645 | { |
|---|