.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * mm/readahead.c - address_space-level file readahead. |
---|
3 | 4 | * |
---|
.. | .. |
---|
21 | 22 | #include <linux/mm_inline.h> |
---|
22 | 23 | #include <linux/blk-cgroup.h> |
---|
23 | 24 | #include <linux/fadvise.h> |
---|
| 25 | +#include <linux/sched/mm.h> |
---|
| 26 | +#include <trace/hooks/mm.h> |
---|
24 | 27 | |
---|
25 | 28 | #include "internal.h" |
---|
| 29 | + |
---|
| 30 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
---|
| 31 | +#include <linux/fscrypt.h> |
---|
| 32 | +#endif |
---|
26 | 33 | |
---|
27 | 34 | /* |
---|
28 | 35 | * Initialise a struct file's readahead state. Assumes that the caller has |
---|
.. | .. |
---|
81 | 88 | * @data: private data for the callback routine. |
---|
82 | 89 | * |
---|
83 | 90 | * Hides the details of the LRU cache etc from the filesystems. |
---|
| 91 | + * |
---|
| 92 | + * Returns: %0 on success, error return by @filler otherwise |
---|
84 | 93 | */ |
---|
85 | 94 | int read_cache_pages(struct address_space *mapping, struct list_head *pages, |
---|
86 | 95 | int (*filler)(void *, struct page *), void *data) |
---|
.. | .. |
---|
110 | 119 | |
---|
111 | 120 | EXPORT_SYMBOL(read_cache_pages); |
---|
112 | 121 | |
---|
113 | | -static int read_pages(struct address_space *mapping, struct file *filp, |
---|
114 | | - struct list_head *pages, unsigned int nr_pages, gfp_t gfp) |
---|
| 122 | +gfp_t readahead_gfp_mask(struct address_space *x) |
---|
115 | 123 | { |
---|
| 124 | + gfp_t mask = mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; |
---|
| 125 | + |
---|
| 126 | + trace_android_rvh_set_readahead_gfp_mask(&mask); |
---|
| 127 | + return mask; |
---|
| 128 | +} |
---|
| 129 | +EXPORT_SYMBOL_GPL(readahead_gfp_mask); |
---|
| 130 | + |
---|
| 131 | +static void read_pages(struct readahead_control *rac, struct list_head *pages, |
---|
| 132 | + bool skip_page) |
---|
| 133 | +{ |
---|
| 134 | + const struct address_space_operations *aops = rac->mapping->a_ops; |
---|
| 135 | + struct page *page; |
---|
116 | 136 | struct blk_plug plug; |
---|
117 | | - unsigned page_idx; |
---|
118 | | - int ret; |
---|
| 137 | + |
---|
| 138 | + if (!readahead_count(rac)) |
---|
| 139 | + goto out; |
---|
119 | 140 | |
---|
120 | 141 | blk_start_plug(&plug); |
---|
121 | 142 | |
---|
122 | | - if (mapping->a_ops->readpages) { |
---|
123 | | - ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); |
---|
| 143 | + if (aops->readahead) { |
---|
| 144 | + aops->readahead(rac); |
---|
| 145 | + /* Clean up the remaining pages */ |
---|
| 146 | + while ((page = readahead_page(rac))) { |
---|
| 147 | + unlock_page(page); |
---|
| 148 | + put_page(page); |
---|
| 149 | + } |
---|
| 150 | + } else if (aops->readpages) { |
---|
| 151 | + aops->readpages(rac->file, rac->mapping, pages, |
---|
| 152 | + readahead_count(rac)); |
---|
124 | 153 | /* Clean up the remaining pages */ |
---|
125 | 154 | put_pages_list(pages); |
---|
126 | | - goto out; |
---|
| 155 | + rac->_index += rac->_nr_pages; |
---|
| 156 | + rac->_nr_pages = 0; |
---|
| 157 | + } else { |
---|
| 158 | + while ((page = readahead_page(rac))) { |
---|
| 159 | + aops->readpage(rac->file, page); |
---|
| 160 | + put_page(page); |
---|
| 161 | + } |
---|
127 | 162 | } |
---|
128 | 163 | |
---|
129 | | - for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
---|
130 | | - struct page *page = lru_to_page(pages); |
---|
131 | | - list_del(&page->lru); |
---|
132 | | - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) |
---|
133 | | - mapping->a_ops->readpage(filp, page); |
---|
134 | | - put_page(page); |
---|
135 | | - } |
---|
136 | | - ret = 0; |
---|
137 | | - |
---|
138 | | -out: |
---|
139 | 164 | blk_finish_plug(&plug); |
---|
140 | 165 | |
---|
141 | | - return ret; |
---|
| 166 | + BUG_ON(!list_empty(pages)); |
---|
| 167 | + BUG_ON(readahead_count(rac)); |
---|
| 168 | + |
---|
| 169 | +out: |
---|
| 170 | + if (skip_page) |
---|
| 171 | + rac->_index++; |
---|
142 | 172 | } |
---|
143 | 173 | |
---|
144 | | -/* |
---|
145 | | - * __do_page_cache_readahead() actually reads a chunk of disk. It allocates |
---|
146 | | - * the pages first, then submits them for I/O. This avoids the very bad |
---|
147 | | - * behaviour which would occur if page allocations are causing VM writeback. |
---|
148 | | - * We really don't want to intermingle reads and writes like that. |
---|
| 174 | +/** |
---|
| 175 | + * page_cache_ra_unbounded - Start unchecked readahead. |
---|
| 176 | + * @ractl: Readahead control. |
---|
| 177 | + * @nr_to_read: The number of pages to read. |
---|
| 178 | + * @lookahead_size: Where to start the next readahead. |
---|
149 | 179 | * |
---|
150 | | - * Returns the number of pages requested, or the maximum amount of I/O allowed. |
---|
| 180 | + * This function is for filesystems to call when they want to start |
---|
| 181 | + * readahead beyond a file's stated i_size. This is almost certainly |
---|
| 182 | + * not the function you want to call. Use page_cache_async_readahead() |
---|
| 183 | + * or page_cache_sync_readahead() instead. |
---|
| 184 | + * |
---|
| 185 | + * Context: File is referenced by caller. Mutexes may be held by caller. |
---|
| 186 | + * May sleep, but will not reenter filesystem to reclaim memory. |
---|
151 | 187 | */ |
---|
152 | | -unsigned int __do_page_cache_readahead(struct address_space *mapping, |
---|
153 | | - struct file *filp, pgoff_t offset, unsigned long nr_to_read, |
---|
154 | | - unsigned long lookahead_size) |
---|
| 188 | +void page_cache_ra_unbounded(struct readahead_control *ractl, |
---|
| 189 | + unsigned long nr_to_read, unsigned long lookahead_size) |
---|
155 | 190 | { |
---|
156 | | - struct inode *inode = mapping->host; |
---|
157 | | - struct page *page; |
---|
158 | | - unsigned long end_index; /* The last page we want to read */ |
---|
| 191 | + struct address_space *mapping = ractl->mapping; |
---|
| 192 | + unsigned long index = readahead_index(ractl); |
---|
159 | 193 | LIST_HEAD(page_pool); |
---|
160 | | - int page_idx; |
---|
161 | | - unsigned int nr_pages = 0; |
---|
162 | | - loff_t isize = i_size_read(inode); |
---|
163 | 194 | gfp_t gfp_mask = readahead_gfp_mask(mapping); |
---|
| 195 | + unsigned long i; |
---|
164 | 196 | |
---|
165 | | - if (isize == 0) |
---|
166 | | - goto out; |
---|
167 | | - |
---|
168 | | - end_index = ((isize - 1) >> PAGE_SHIFT); |
---|
| 197 | + /* |
---|
| 198 | + * Partway through the readahead operation, we will have added |
---|
| 199 | + * locked pages to the page cache, but will not yet have submitted |
---|
| 200 | + * them for I/O. Adding another page may need to allocate memory, |
---|
| 201 | + * which can trigger memory reclaim. Telling the VM we're in |
---|
| 202 | + * the middle of a filesystem operation will cause it to not |
---|
| 203 | + * touch file-backed pages, preventing a deadlock. Most (all?) |
---|
| 204 | + * filesystems already specify __GFP_NOFS in their mapping's |
---|
| 205 | + * gfp_mask, but let's be explicit here. |
---|
| 206 | + */ |
---|
| 207 | + unsigned int nofs = memalloc_nofs_save(); |
---|
169 | 208 | |
---|
170 | 209 | /* |
---|
171 | 210 | * Preallocate as many pages as we will need. |
---|
172 | 211 | */ |
---|
173 | | - for (page_idx = 0; page_idx < nr_to_read; page_idx++) { |
---|
174 | | - pgoff_t page_offset = offset + page_idx; |
---|
| 212 | + for (i = 0; i < nr_to_read; i++) { |
---|
| 213 | + struct page *page = xa_load(&mapping->i_pages, index + i); |
---|
175 | 214 | |
---|
176 | | - if (page_offset > end_index) |
---|
177 | | - break; |
---|
| 215 | + BUG_ON(index + i != ractl->_index + ractl->_nr_pages); |
---|
178 | 216 | |
---|
179 | | - rcu_read_lock(); |
---|
180 | | - page = radix_tree_lookup(&mapping->i_pages, page_offset); |
---|
181 | | - rcu_read_unlock(); |
---|
182 | | - if (page && !radix_tree_exceptional_entry(page)) { |
---|
| 217 | + if (page && !xa_is_value(page)) { |
---|
183 | 218 | /* |
---|
184 | | - * Page already present? Kick off the current batch of |
---|
185 | | - * contiguous pages before continuing with the next |
---|
186 | | - * batch. |
---|
| 219 | + * Page already present? Kick off the current batch |
---|
| 220 | + * of contiguous pages before continuing with the |
---|
| 221 | + * next batch. This page may be the one we would |
---|
| 222 | + * have intended to mark as Readahead, but we don't |
---|
| 223 | + * have a stable reference to this page, and it's |
---|
| 224 | + * not worth getting one just for that. |
---|
187 | 225 | */ |
---|
188 | | - if (nr_pages) |
---|
189 | | - read_pages(mapping, filp, &page_pool, nr_pages, |
---|
190 | | - gfp_mask); |
---|
191 | | - nr_pages = 0; |
---|
| 226 | + read_pages(ractl, &page_pool, true); |
---|
192 | 227 | continue; |
---|
193 | 228 | } |
---|
194 | 229 | |
---|
195 | 230 | page = __page_cache_alloc(gfp_mask); |
---|
196 | 231 | if (!page) |
---|
197 | 232 | break; |
---|
198 | | - page->index = page_offset; |
---|
199 | | - list_add(&page->lru, &page_pool); |
---|
200 | | - if (page_idx == nr_to_read - lookahead_size) |
---|
| 233 | + if (mapping->a_ops->readpages) { |
---|
| 234 | + page->index = index + i; |
---|
| 235 | + list_add(&page->lru, &page_pool); |
---|
| 236 | + } else if (add_to_page_cache_lru(page, mapping, index + i, |
---|
| 237 | + gfp_mask) < 0) { |
---|
| 238 | + put_page(page); |
---|
| 239 | + read_pages(ractl, &page_pool, true); |
---|
| 240 | + continue; |
---|
| 241 | + } |
---|
| 242 | + if (i == nr_to_read - lookahead_size) |
---|
201 | 243 | SetPageReadahead(page); |
---|
202 | | - nr_pages++; |
---|
| 244 | + ractl->_nr_pages++; |
---|
203 | 245 | } |
---|
204 | 246 | |
---|
205 | 247 | /* |
---|
.. | .. |
---|
207 | 249 | * uptodate then the caller will launch readpage again, and |
---|
208 | 250 | * will then handle the error. |
---|
209 | 251 | */ |
---|
210 | | - if (nr_pages) |
---|
211 | | - read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask); |
---|
212 | | - BUG_ON(!list_empty(&page_pool)); |
---|
213 | | -out: |
---|
214 | | - return nr_pages; |
---|
| 252 | + read_pages(ractl, &page_pool, false); |
---|
| 253 | + memalloc_nofs_restore(nofs); |
---|
| 254 | +} |
---|
| 255 | +EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); |
---|
| 256 | + |
---|
| 257 | +/* |
---|
| 258 | + * do_page_cache_ra() actually reads a chunk of disk. It allocates |
---|
| 259 | + * the pages first, then submits them for I/O. This avoids the very bad |
---|
| 260 | + * behaviour which would occur if page allocations are causing VM writeback. |
---|
| 261 | + * We really don't want to intermingle reads and writes like that. |
---|
| 262 | + */ |
---|
| 263 | +void do_page_cache_ra(struct readahead_control *ractl, |
---|
| 264 | + unsigned long nr_to_read, unsigned long lookahead_size) |
---|
| 265 | +{ |
---|
| 266 | + struct inode *inode = ractl->mapping->host; |
---|
| 267 | + unsigned long index = readahead_index(ractl); |
---|
| 268 | + loff_t isize = i_size_read(inode); |
---|
| 269 | + pgoff_t end_index; /* The last page we want to read */ |
---|
| 270 | + |
---|
| 271 | + if (isize == 0) |
---|
| 272 | + return; |
---|
| 273 | + |
---|
| 274 | + end_index = (isize - 1) >> PAGE_SHIFT; |
---|
| 275 | + if (index > end_index) |
---|
| 276 | + return; |
---|
| 277 | + /* Don't read past the page containing the last byte of the file */ |
---|
| 278 | + if (nr_to_read > end_index - index) |
---|
| 279 | + nr_to_read = end_index - index + 1; |
---|
| 280 | + |
---|
| 281 | + page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size); |
---|
215 | 282 | } |
---|
216 | 283 | |
---|
217 | 284 | /* |
---|
218 | 285 | * Chunk the readahead into 2 megabyte units, so that we don't pin too much |
---|
219 | 286 | * memory at once. |
---|
220 | 287 | */ |
---|
221 | | -int force_page_cache_readahead(struct address_space *mapping, struct file *filp, |
---|
222 | | - pgoff_t offset, unsigned long nr_to_read) |
---|
| 288 | +void force_page_cache_ra(struct readahead_control *ractl, |
---|
| 289 | + struct file_ra_state *ra, unsigned long nr_to_read) |
---|
223 | 290 | { |
---|
| 291 | + struct address_space *mapping = ractl->mapping; |
---|
224 | 292 | struct backing_dev_info *bdi = inode_to_bdi(mapping->host); |
---|
225 | | - struct file_ra_state *ra = &filp->f_ra; |
---|
226 | | - unsigned long max_pages; |
---|
| 293 | + unsigned long max_pages, index; |
---|
| 294 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
---|
| 295 | + bool force_lookahead = false; |
---|
| 296 | +#endif |
---|
227 | 297 | |
---|
228 | | - if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) |
---|
229 | | - return -EINVAL; |
---|
| 298 | + if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages && |
---|
| 299 | + !mapping->a_ops->readahead)) |
---|
| 300 | + return; |
---|
230 | 301 | |
---|
231 | 302 | /* |
---|
232 | 303 | * If the request exceeds the readahead window, allow the read to |
---|
233 | 304 | * be up to the optimal hardware IO size |
---|
234 | 305 | */ |
---|
| 306 | + index = readahead_index(ractl); |
---|
235 | 307 | max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); |
---|
236 | | - nr_to_read = min(nr_to_read, max_pages); |
---|
| 308 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
---|
| 309 | + /* For files with fscrypt enabled, to allow IO and the encryption |
---|
| 310 | + * or decryption process to ping-pong, lookahead is forcibly enabled. |
---|
| 311 | + */ |
---|
| 312 | + if (nr_to_read > max_pages && fscrypt_inode_uses_fs_layer_crypto(mapping->host)) |
---|
| 313 | + force_lookahead = true; |
---|
| 314 | +#endif |
---|
| 315 | + nr_to_read = min_t(unsigned long, nr_to_read, max_pages); |
---|
237 | 316 | while (nr_to_read) { |
---|
238 | 317 | unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE; |
---|
239 | 318 | |
---|
240 | 319 | if (this_chunk > nr_to_read) |
---|
241 | 320 | this_chunk = nr_to_read; |
---|
242 | | - __do_page_cache_readahead(mapping, filp, offset, this_chunk, 0); |
---|
| 321 | + ractl->_index = index; |
---|
| 322 | +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) |
---|
| 323 | + if (force_lookahead) |
---|
| 324 | + do_page_cache_ra(ractl, this_chunk, this_chunk / 2); |
---|
| 325 | + else |
---|
| 326 | + do_page_cache_ra(ractl, this_chunk, 0); |
---|
| 327 | +#else |
---|
| 328 | + do_page_cache_ra(ractl, this_chunk, 0); |
---|
| 329 | +#endif |
---|
243 | 330 | |
---|
244 | | - offset += this_chunk; |
---|
| 331 | + index += this_chunk; |
---|
245 | 332 | nr_to_read -= this_chunk; |
---|
246 | 333 | } |
---|
247 | | - return 0; |
---|
248 | 334 | } |
---|
249 | 335 | |
---|
250 | 336 | /* |
---|
.. | .. |
---|
272 | 358 | * return it as the new window size. |
---|
273 | 359 | */ |
---|
274 | 360 | static unsigned long get_next_ra_size(struct file_ra_state *ra, |
---|
275 | | - unsigned long max) |
---|
| 361 | + unsigned long max) |
---|
276 | 362 | { |
---|
277 | 363 | unsigned long cur = ra->size; |
---|
278 | | - unsigned long newsize; |
---|
279 | 364 | |
---|
280 | 365 | if (cur < max / 16) |
---|
281 | | - newsize = 4 * cur; |
---|
282 | | - else |
---|
283 | | - newsize = 2 * cur; |
---|
284 | | - |
---|
285 | | - return min(newsize, max); |
---|
| 366 | + return 4 * cur; |
---|
| 367 | + if (cur <= max / 2) |
---|
| 368 | + return 2 * cur; |
---|
| 369 | + return max; |
---|
286 | 370 | } |
---|
287 | 371 | |
---|
288 | 372 | /* |
---|
.. | .. |
---|
325 | 409 | */ |
---|
326 | 410 | |
---|
327 | 411 | /* |
---|
328 | | - * Count contiguously cached pages from @offset-1 to @offset-@max, |
---|
| 412 | + * Count contiguously cached pages from @index-1 to @index-@max, |
---|
329 | 413 | * this count is a conservative estimation of |
---|
330 | 414 | * - length of the sequential read sequence, or |
---|
331 | 415 | * - thrashing threshold in memory tight systems |
---|
332 | 416 | */ |
---|
333 | 417 | static pgoff_t count_history_pages(struct address_space *mapping, |
---|
334 | | - pgoff_t offset, unsigned long max) |
---|
| 418 | + pgoff_t index, unsigned long max) |
---|
335 | 419 | { |
---|
336 | 420 | pgoff_t head; |
---|
337 | 421 | |
---|
338 | 422 | rcu_read_lock(); |
---|
339 | | - head = page_cache_prev_hole(mapping, offset - 1, max); |
---|
| 423 | + head = page_cache_prev_miss(mapping, index - 1, max); |
---|
340 | 424 | rcu_read_unlock(); |
---|
341 | 425 | |
---|
342 | | - return offset - 1 - head; |
---|
| 426 | + return index - 1 - head; |
---|
343 | 427 | } |
---|
344 | 428 | |
---|
345 | 429 | /* |
---|
.. | .. |
---|
347 | 431 | */ |
---|
348 | 432 | static int try_context_readahead(struct address_space *mapping, |
---|
349 | 433 | struct file_ra_state *ra, |
---|
350 | | - pgoff_t offset, |
---|
| 434 | + pgoff_t index, |
---|
351 | 435 | unsigned long req_size, |
---|
352 | 436 | unsigned long max) |
---|
353 | 437 | { |
---|
354 | 438 | pgoff_t size; |
---|
355 | 439 | |
---|
356 | | - size = count_history_pages(mapping, offset, max); |
---|
| 440 | + size = count_history_pages(mapping, index, max); |
---|
357 | 441 | |
---|
358 | 442 | /* |
---|
359 | 443 | * not enough history pages: |
---|
.. | .. |
---|
366 | 450 | * starts from beginning of file: |
---|
367 | 451 | * it is a strong indication of long-run stream (or whole-file-read) |
---|
368 | 452 | */ |
---|
369 | | - if (size >= offset) |
---|
| 453 | + if (size >= index) |
---|
370 | 454 | size *= 2; |
---|
371 | 455 | |
---|
372 | | - ra->start = offset; |
---|
| 456 | + ra->start = index; |
---|
373 | 457 | ra->size = min(size + req_size, max); |
---|
374 | 458 | ra->async_size = 1; |
---|
375 | 459 | |
---|
.. | .. |
---|
379 | 463 | /* |
---|
380 | 464 | * A minimal readahead algorithm for trivial sequential/random reads. |
---|
381 | 465 | */ |
---|
382 | | -static unsigned long |
---|
383 | | -ondemand_readahead(struct address_space *mapping, |
---|
384 | | - struct file_ra_state *ra, struct file *filp, |
---|
385 | | - bool hit_readahead_marker, pgoff_t offset, |
---|
386 | | - unsigned long req_size) |
---|
| 466 | +static void ondemand_readahead(struct readahead_control *ractl, |
---|
| 467 | + struct file_ra_state *ra, bool hit_readahead_marker, |
---|
| 468 | + unsigned long req_size) |
---|
387 | 469 | { |
---|
388 | | - struct backing_dev_info *bdi = inode_to_bdi(mapping->host); |
---|
| 470 | + struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host); |
---|
389 | 471 | unsigned long max_pages = ra->ra_pages; |
---|
390 | 472 | unsigned long add_pages; |
---|
391 | | - pgoff_t prev_offset; |
---|
| 473 | + unsigned long index = readahead_index(ractl); |
---|
| 474 | + pgoff_t prev_index; |
---|
392 | 475 | |
---|
393 | 476 | /* |
---|
394 | 477 | * If the request exceeds the readahead window, allow the read to |
---|
.. | .. |
---|
397 | 480 | if (req_size > max_pages && bdi->io_pages > max_pages) |
---|
398 | 481 | max_pages = min(req_size, bdi->io_pages); |
---|
399 | 482 | |
---|
| 483 | + trace_android_vh_ra_tuning_max_page(ractl, &max_pages); |
---|
| 484 | + |
---|
400 | 485 | /* |
---|
401 | 486 | * start of file |
---|
402 | 487 | */ |
---|
403 | | - if (!offset) |
---|
| 488 | + if (!index) |
---|
404 | 489 | goto initial_readahead; |
---|
405 | 490 | |
---|
406 | 491 | /* |
---|
407 | | - * It's the expected callback offset, assume sequential access. |
---|
| 492 | + * It's the expected callback index, assume sequential access. |
---|
408 | 493 | * Ramp up sizes, and push forward the readahead window. |
---|
409 | 494 | */ |
---|
410 | | - if ((offset == (ra->start + ra->size - ra->async_size) || |
---|
411 | | - offset == (ra->start + ra->size))) { |
---|
| 495 | + if ((index == (ra->start + ra->size - ra->async_size) || |
---|
| 496 | + index == (ra->start + ra->size))) { |
---|
412 | 497 | ra->start += ra->size; |
---|
413 | 498 | ra->size = get_next_ra_size(ra, max_pages); |
---|
414 | 499 | ra->async_size = ra->size; |
---|
.. | .. |
---|
425 | 510 | pgoff_t start; |
---|
426 | 511 | |
---|
427 | 512 | rcu_read_lock(); |
---|
428 | | - start = page_cache_next_hole(mapping, offset + 1, max_pages); |
---|
| 513 | + start = page_cache_next_miss(ractl->mapping, index + 1, |
---|
| 514 | + max_pages); |
---|
429 | 515 | rcu_read_unlock(); |
---|
430 | 516 | |
---|
431 | | - if (!start || start - offset > max_pages) |
---|
432 | | - return 0; |
---|
| 517 | + if (!start || start - index > max_pages) |
---|
| 518 | + return; |
---|
433 | 519 | |
---|
434 | 520 | ra->start = start; |
---|
435 | | - ra->size = start - offset; /* old async_size */ |
---|
| 521 | + ra->size = start - index; /* old async_size */ |
---|
436 | 522 | ra->size += req_size; |
---|
437 | 523 | ra->size = get_next_ra_size(ra, max_pages); |
---|
438 | 524 | ra->async_size = ra->size; |
---|
.. | .. |
---|
447 | 533 | |
---|
448 | 534 | /* |
---|
449 | 535 | * sequential cache miss |
---|
450 | | - * trivial case: (offset - prev_offset) == 1 |
---|
451 | | - * unaligned reads: (offset - prev_offset) == 0 |
---|
| 536 | + * trivial case: (index - prev_index) == 1 |
---|
| 537 | + * unaligned reads: (index - prev_index) == 0 |
---|
452 | 538 | */ |
---|
453 | | - prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT; |
---|
454 | | - if (offset - prev_offset <= 1UL) |
---|
| 539 | + prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT; |
---|
| 540 | + if (index - prev_index <= 1UL) |
---|
455 | 541 | goto initial_readahead; |
---|
456 | 542 | |
---|
457 | 543 | /* |
---|
458 | 544 | * Query the page cache and look for the traces(cached history pages) |
---|
459 | 545 | * that a sequential stream would leave behind. |
---|
460 | 546 | */ |
---|
461 | | - if (try_context_readahead(mapping, ra, offset, req_size, max_pages)) |
---|
| 547 | + if (try_context_readahead(ractl->mapping, ra, index, req_size, |
---|
| 548 | + max_pages)) |
---|
462 | 549 | goto readit; |
---|
463 | 550 | |
---|
464 | 551 | /* |
---|
465 | 552 | * standalone, small random read |
---|
466 | 553 | * Read as is, and do not pollute the readahead state. |
---|
467 | 554 | */ |
---|
468 | | - return __do_page_cache_readahead(mapping, filp, offset, req_size, 0); |
---|
| 555 | + do_page_cache_ra(ractl, req_size, 0); |
---|
| 556 | + return; |
---|
469 | 557 | |
---|
470 | 558 | initial_readahead: |
---|
471 | | - ra->start = offset; |
---|
| 559 | + ra->start = index; |
---|
472 | 560 | ra->size = get_init_ra_size(req_size, max_pages); |
---|
473 | 561 | ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; |
---|
474 | 562 | |
---|
.. | .. |
---|
479 | 567 | * the resulted next readahead window into the current one. |
---|
480 | 568 | * Take care of maximum IO pages as above. |
---|
481 | 569 | */ |
---|
482 | | - if (offset == ra->start && ra->size == ra->async_size) { |
---|
| 570 | + if (index == ra->start && ra->size == ra->async_size) { |
---|
483 | 571 | add_pages = get_next_ra_size(ra, max_pages); |
---|
484 | 572 | if (ra->size + add_pages <= max_pages) { |
---|
485 | 573 | ra->async_size = add_pages; |
---|
.. | .. |
---|
490 | 578 | } |
---|
491 | 579 | } |
---|
492 | 580 | |
---|
493 | | - return ra_submit(ra, mapping, filp); |
---|
| 581 | + ractl->_index = ra->start; |
---|
| 582 | + do_page_cache_ra(ractl, ra->size, ra->async_size); |
---|
494 | 583 | } |
---|
495 | 584 | |
---|
496 | | -/** |
---|
497 | | - * page_cache_sync_readahead - generic file readahead |
---|
498 | | - * @mapping: address_space which holds the pagecache and I/O vectors |
---|
499 | | - * @ra: file_ra_state which holds the readahead state |
---|
500 | | - * @filp: passed on to ->readpage() and ->readpages() |
---|
501 | | - * @offset: start offset into @mapping, in pagecache page-sized units |
---|
502 | | - * @req_size: hint: total size of the read which the caller is performing in |
---|
503 | | - * pagecache pages |
---|
504 | | - * |
---|
505 | | - * page_cache_sync_readahead() should be called when a cache miss happened: |
---|
506 | | - * it will submit the read. The readahead logic may decide to piggyback more |
---|
507 | | - * pages onto the read request if access patterns suggest it will improve |
---|
508 | | - * performance. |
---|
509 | | - */ |
---|
510 | | -void page_cache_sync_readahead(struct address_space *mapping, |
---|
511 | | - struct file_ra_state *ra, struct file *filp, |
---|
512 | | - pgoff_t offset, unsigned long req_size) |
---|
| 585 | +void page_cache_sync_ra(struct readahead_control *ractl, |
---|
| 586 | + struct file_ra_state *ra, unsigned long req_count) |
---|
513 | 587 | { |
---|
514 | | - /* no read-ahead */ |
---|
515 | | - if (!ra->ra_pages) |
---|
516 | | - return; |
---|
| 588 | + bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); |
---|
517 | 589 | |
---|
518 | | - if (blk_cgroup_congested()) |
---|
519 | | - return; |
---|
| 590 | + /* |
---|
| 591 | + * Even if read-ahead is disabled, issue this request as read-ahead |
---|
| 592 | + * as we'll need it to satisfy the requested range. The forced |
---|
| 593 | + * read-ahead will do the right thing and limit the read to just the |
---|
| 594 | + * requested range, which we'll set to 1 page for this case. |
---|
| 595 | + */ |
---|
| 596 | + if (!ra->ra_pages || blk_cgroup_congested()) { |
---|
| 597 | + if (!ractl->file) |
---|
| 598 | + return; |
---|
| 599 | + req_count = 1; |
---|
| 600 | + do_forced_ra = true; |
---|
| 601 | + } |
---|
520 | 602 | |
---|
521 | 603 | /* be dumb */ |
---|
522 | | - if (filp && (filp->f_mode & FMODE_RANDOM)) { |
---|
523 | | - force_page_cache_readahead(mapping, filp, offset, req_size); |
---|
| 604 | + if (do_forced_ra) { |
---|
| 605 | + force_page_cache_ra(ractl, ra, req_count); |
---|
524 | 606 | return; |
---|
525 | 607 | } |
---|
526 | 608 | |
---|
527 | 609 | /* do read-ahead */ |
---|
528 | | - ondemand_readahead(mapping, ra, filp, false, offset, req_size); |
---|
| 610 | + ondemand_readahead(ractl, ra, false, req_count); |
---|
529 | 611 | } |
---|
530 | | -EXPORT_SYMBOL_GPL(page_cache_sync_readahead); |
---|
| 612 | +EXPORT_SYMBOL_GPL(page_cache_sync_ra); |
---|
531 | 613 | |
---|
532 | | -/** |
---|
533 | | - * page_cache_async_readahead - file readahead for marked pages |
---|
534 | | - * @mapping: address_space which holds the pagecache and I/O vectors |
---|
535 | | - * @ra: file_ra_state which holds the readahead state |
---|
536 | | - * @filp: passed on to ->readpage() and ->readpages() |
---|
537 | | - * @page: the page at @offset which has the PG_readahead flag set |
---|
538 | | - * @offset: start offset into @mapping, in pagecache page-sized units |
---|
539 | | - * @req_size: hint: total size of the read which the caller is performing in |
---|
540 | | - * pagecache pages |
---|
541 | | - * |
---|
542 | | - * page_cache_async_readahead() should be called when a page is used which |
---|
543 | | - * has the PG_readahead flag; this is a marker to suggest that the application |
---|
544 | | - * has used up enough of the readahead window that we should start pulling in |
---|
545 | | - * more pages. |
---|
546 | | - */ |
---|
547 | | -void |
---|
548 | | -page_cache_async_readahead(struct address_space *mapping, |
---|
549 | | - struct file_ra_state *ra, struct file *filp, |
---|
550 | | - struct page *page, pgoff_t offset, |
---|
551 | | - unsigned long req_size) |
---|
| 614 | +void page_cache_async_ra(struct readahead_control *ractl, |
---|
| 615 | + struct file_ra_state *ra, struct page *page, |
---|
| 616 | + unsigned long req_count) |
---|
552 | 617 | { |
---|
553 | 618 | /* no read-ahead */ |
---|
554 | 619 | if (!ra->ra_pages) |
---|
.. | .. |
---|
565 | 630 | /* |
---|
566 | 631 | * Defer asynchronous read-ahead on IO congestion. |
---|
567 | 632 | */ |
---|
568 | | - if (inode_read_congested(mapping->host)) |
---|
| 633 | + if (inode_read_congested(ractl->mapping->host)) |
---|
569 | 634 | return; |
---|
570 | 635 | |
---|
571 | 636 | if (blk_cgroup_congested()) |
---|
572 | 637 | return; |
---|
573 | 638 | |
---|
574 | 639 | /* do read-ahead */ |
---|
575 | | - ondemand_readahead(mapping, ra, filp, true, offset, req_size); |
---|
| 640 | + ondemand_readahead(ractl, ra, true, req_count); |
---|
576 | 641 | } |
---|
577 | | -EXPORT_SYMBOL_GPL(page_cache_async_readahead); |
---|
| 642 | +EXPORT_SYMBOL_GPL(page_cache_async_ra); |
---|
578 | 643 | |
---|
579 | 644 | ssize_t ksys_readahead(int fd, loff_t offset, size_t count) |
---|
580 | 645 | { |
---|