.. | .. |
---|
6 | 6 | */ |
---|
7 | 7 | |
---|
8 | 8 | #include <linux/memcontrol.h> |
---|
| 9 | +#include <linux/mm_inline.h> |
---|
9 | 10 | #include <linux/writeback.h> |
---|
10 | 11 | #include <linux/shmem_fs.h> |
---|
11 | 12 | #include <linux/pagemap.h> |
---|
.. | .. |
---|
156 | 157 | * |
---|
157 | 158 | * Implementation |
---|
158 | 159 | * |
---|
159 | | - * For each node's file LRU lists, a counter for inactive evictions |
---|
160 | | - * and activations is maintained (node->inactive_age). |
---|
| 160 | + * For each node's LRU lists, a counter for inactive evictions and |
---|
| 161 | + * activations is maintained (node->nonresident_age). |
---|
161 | 162 | * |
---|
162 | 163 | * On eviction, a snapshot of this counter (along with some bits to |
---|
163 | | - * identify the node) is stored in the now empty page cache radix tree |
---|
| 164 | + * identify the node) is stored in the now empty page cache |
---|
164 | 165 | * slot of the evicted page. This is called a shadow entry. |
---|
165 | 166 | * |
---|
166 | 167 | * On cache misses for which there are shadow entries, an eligible |
---|
167 | 168 | * refault distance will immediately activate the refaulting page. |
---|
168 | 169 | */ |
---|
169 | 170 | |
---|
170 | | -#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ |
---|
| 171 | +#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ |
---|
171 | 172 | 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT) |
---|
172 | 173 | #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) |
---|
173 | 174 | |
---|
174 | 175 | /* |
---|
175 | 176 | * Eviction timestamps need to be able to cover the full range of |
---|
176 | | - * actionable refaults. However, bits are tight in the radix tree |
---|
| 177 | + * actionable refaults. However, bits are tight in the xarray |
---|
177 | 178 | * entry, and after storing the identifier for the lruvec there might |
---|
178 | 179 | * not be enough left to represent every single actionable refault. In |
---|
179 | 180 | * that case, we have to sacrifice granularity for distance, and group |
---|
.. | .. |
---|
185 | 186 | bool workingset) |
---|
186 | 187 | { |
---|
187 | 188 | eviction >>= bucket_order; |
---|
| 189 | + eviction &= EVICTION_MASK; |
---|
188 | 190 | eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; |
---|
189 | 191 | eviction = (eviction << NODES_SHIFT) | pgdat->node_id; |
---|
190 | 192 | eviction = (eviction << 1) | workingset; |
---|
191 | | - eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); |
---|
192 | 193 | |
---|
193 | | - return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); |
---|
| 194 | + return xa_mk_value(eviction); |
---|
194 | 195 | } |
---|
195 | 196 | |
---|
196 | 197 | static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, |
---|
197 | 198 | unsigned long *evictionp, bool *workingsetp) |
---|
198 | 199 | { |
---|
199 | | - unsigned long entry = (unsigned long)shadow; |
---|
| 200 | + unsigned long entry = xa_to_value(shadow); |
---|
200 | 201 | int memcgid, nid; |
---|
201 | 202 | bool workingset; |
---|
202 | 203 | |
---|
203 | | - entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; |
---|
204 | 204 | workingset = entry & 1; |
---|
205 | 205 | entry >>= 1; |
---|
206 | 206 | nid = entry & ((1UL << NODES_SHIFT) - 1); |
---|
.. | .. |
---|
215 | 215 | } |
---|
216 | 216 | |
---|
217 | 217 | /** |
---|
| 218 | + * workingset_age_nonresident - age non-resident entries as LRU ages |
---|
| 219 | + * @lruvec: the lruvec that was aged |
---|
| 220 | + * @nr_pages: the number of pages to count |
---|
| 221 | + * |
---|
| 222 | + * As in-memory pages are aged, non-resident pages need to be aged as |
---|
| 223 | + * well, in order for the refault distances later on to be comparable |
---|
| 224 | + * to the in-memory dimensions. This function allows reclaim and LRU |
---|
| 225 | + * operations to drive the non-resident aging along in parallel. |
---|
| 226 | + */ |
---|
| 227 | +void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages) |
---|
| 228 | +{ |
---|
| 229 | + /* |
---|
| 230 | + * Reclaiming a cgroup means reclaiming all its children in a |
---|
| 231 | + * round-robin fashion. That means that each cgroup has an LRU |
---|
| 232 | + * order that is composed of the LRU orders of its child |
---|
| 233 | + * cgroups; and every page has an LRU position not just in the |
---|
| 234 | + * cgroup that owns it, but in all of that group's ancestors. |
---|
| 235 | + * |
---|
| 236 | + * So when the physical inactive list of a leaf cgroup ages, |
---|
| 237 | + * the virtual inactive lists of all its parents, including |
---|
| 238 | + * the root cgroup's, age as well. |
---|
| 239 | + */ |
---|
| 240 | + do { |
---|
| 241 | + atomic_long_add(nr_pages, &lruvec->nonresident_age); |
---|
| 242 | + } while ((lruvec = parent_lruvec(lruvec))); |
---|
| 243 | +} |
---|
| 244 | + |
---|
| 245 | +/** |
---|
218 | 246 | * workingset_eviction - note the eviction of a page from memory |
---|
219 | | - * @mapping: address space the page was backing |
---|
| 247 | + * @target_memcg: the cgroup that is causing the reclaim |
---|
220 | 248 | * @page: the page being evicted |
---|
221 | 249 | * |
---|
222 | | - * Returns a shadow entry to be stored in @mapping->i_pages in place |
---|
| 250 | + * Returns a shadow entry to be stored in @page->mapping->i_pages in place |
---|
223 | 251 | * of the evicted @page so that a later refault can be detected. |
---|
224 | 252 | */ |
---|
225 | | -void *workingset_eviction(struct address_space *mapping, struct page *page) |
---|
| 253 | +void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) |
---|
226 | 254 | { |
---|
227 | 255 | struct pglist_data *pgdat = page_pgdat(page); |
---|
228 | | - struct mem_cgroup *memcg = page_memcg(page); |
---|
229 | | - int memcgid = mem_cgroup_id(memcg); |
---|
230 | 256 | unsigned long eviction; |
---|
231 | 257 | struct lruvec *lruvec; |
---|
| 258 | + int memcgid; |
---|
232 | 259 | |
---|
233 | 260 | /* Page is fully exclusive and pins page->mem_cgroup */ |
---|
234 | 261 | VM_BUG_ON_PAGE(PageLRU(page), page); |
---|
235 | 262 | VM_BUG_ON_PAGE(page_count(page), page); |
---|
236 | 263 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
---|
237 | 264 | |
---|
238 | | - lruvec = mem_cgroup_lruvec(pgdat, memcg); |
---|
239 | | - eviction = atomic_long_inc_return(&lruvec->inactive_age); |
---|
| 265 | + lruvec = mem_cgroup_lruvec(target_memcg, pgdat); |
---|
| 266 | + workingset_age_nonresident(lruvec, thp_nr_pages(page)); |
---|
| 267 | + /* XXX: target_memcg can be NULL, go through lruvec */ |
---|
| 268 | + memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); |
---|
| 269 | + eviction = atomic_long_read(&lruvec->nonresident_age); |
---|
240 | 270 | return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); |
---|
241 | 271 | } |
---|
242 | 272 | |
---|
.. | .. |
---|
246 | 276 | * @shadow: shadow entry of the evicted page |
---|
247 | 277 | * |
---|
248 | 278 | * Calculates and evaluates the refault distance of the previously |
---|
249 | | - * evicted page in the context of the node it was allocated in. |
---|
| 279 | + * evicted page in the context of the node and the memcg whose memory |
---|
| 280 | + * pressure caused the eviction. |
---|
250 | 281 | */ |
---|
251 | 282 | void workingset_refault(struct page *page, void *shadow) |
---|
252 | 283 | { |
---|
| 284 | + bool file = page_is_file_lru(page); |
---|
| 285 | + struct mem_cgroup *eviction_memcg; |
---|
| 286 | + struct lruvec *eviction_lruvec; |
---|
253 | 287 | unsigned long refault_distance; |
---|
| 288 | + unsigned long workingset_size; |
---|
254 | 289 | struct pglist_data *pgdat; |
---|
255 | | - unsigned long active_file; |
---|
256 | 290 | struct mem_cgroup *memcg; |
---|
257 | 291 | unsigned long eviction; |
---|
258 | 292 | struct lruvec *lruvec; |
---|
.. | .. |
---|
279 | 313 | * would be better if the root_mem_cgroup existed in all |
---|
280 | 314 | * configurations instead. |
---|
281 | 315 | */ |
---|
282 | | - memcg = mem_cgroup_from_id(memcgid); |
---|
283 | | - if (!mem_cgroup_disabled() && !memcg) |
---|
| 316 | + eviction_memcg = mem_cgroup_from_id(memcgid); |
---|
| 317 | + if (!mem_cgroup_disabled() && !eviction_memcg) |
---|
284 | 318 | goto out; |
---|
285 | | - lruvec = mem_cgroup_lruvec(pgdat, memcg); |
---|
286 | | - refault = atomic_long_read(&lruvec->inactive_age); |
---|
287 | | - active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); |
---|
| 319 | + eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); |
---|
| 320 | + refault = atomic_long_read(&eviction_lruvec->nonresident_age); |
---|
288 | 321 | |
---|
289 | 322 | /* |
---|
290 | 323 | * Calculate the refault distance |
---|
291 | 324 | * |
---|
292 | 325 | * The unsigned subtraction here gives an accurate distance |
---|
293 | | - * across inactive_age overflows in most cases. There is a |
---|
| 326 | + * across nonresident_age overflows in most cases. There is a |
---|
294 | 327 | * special case: usually, shadow entries have a short lifetime |
---|
295 | 328 | * and are either refaulted or reclaimed along with the inode |
---|
296 | 329 | * before they get too old. But it is not impossible for the |
---|
297 | | - * inactive_age to lap a shadow entry in the field, which can |
---|
298 | | - * then result in a false small refault distance, leading to a |
---|
299 | | - * false activation should this old entry actually refault |
---|
300 | | - * again. However, earlier kernels used to deactivate |
---|
| 330 | + * nonresident_age to lap a shadow entry in the field, which |
---|
| 331 | + * can then result in a false small refault distance, leading |
---|
| 332 | + * to a false activation should this old entry actually |
---|
| 333 | + * refault again. However, earlier kernels used to deactivate |
---|
301 | 334 | * unconditionally with *every* reclaim invocation for the |
---|
302 | 335 | * longest time, so the occasional inappropriate activation |
---|
303 | 336 | * leading to pressure on the active list is not a problem. |
---|
304 | 337 | */ |
---|
305 | 338 | refault_distance = (refault - eviction) & EVICTION_MASK; |
---|
306 | 339 | |
---|
307 | | - inc_lruvec_state(lruvec, WORKINGSET_REFAULT); |
---|
| 340 | + /* |
---|
| 341 | + * The activation decision for this page is made at the level |
---|
| 342 | + * where the eviction occurred, as that is where the LRU order |
---|
| 343 | + * during page reclaim is being determined. |
---|
| 344 | + * |
---|
| 345 | + * However, the cgroup that will own the page is the one that |
---|
| 346 | + * is actually experiencing the refault event. |
---|
| 347 | + */ |
---|
| 348 | + memcg = page_memcg(page); |
---|
| 349 | + lruvec = mem_cgroup_lruvec(memcg, pgdat); |
---|
| 350 | + |
---|
| 351 | + inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); |
---|
308 | 352 | |
---|
309 | 353 | /* |
---|
310 | 354 | * Compare the distance to the existing workingset size. We |
---|
311 | | - * don't act on pages that couldn't stay resident even if all |
---|
312 | | - * the memory was available to the page cache. |
---|
| 355 | + * don't activate pages that couldn't stay resident even if |
---|
| 356 | + * all the memory was available to the workingset. Whether |
---|
| 357 | + * workingset competition needs to consider anon or not depends |
---|
| 358 | + * on having swap. |
---|
313 | 359 | */ |
---|
314 | | - if (refault_distance > active_file) |
---|
| 360 | + workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); |
---|
| 361 | + if (!file) { |
---|
| 362 | + workingset_size += lruvec_page_state(eviction_lruvec, |
---|
| 363 | + NR_INACTIVE_FILE); |
---|
| 364 | + } |
---|
| 365 | + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { |
---|
| 366 | + workingset_size += lruvec_page_state(eviction_lruvec, |
---|
| 367 | + NR_ACTIVE_ANON); |
---|
| 368 | + if (file) { |
---|
| 369 | + workingset_size += lruvec_page_state(eviction_lruvec, |
---|
| 370 | + NR_INACTIVE_ANON); |
---|
| 371 | + } |
---|
| 372 | + } |
---|
| 373 | + if (refault_distance > workingset_size) |
---|
315 | 374 | goto out; |
---|
316 | 375 | |
---|
317 | 376 | SetPageActive(page); |
---|
318 | | - atomic_long_inc(&lruvec->inactive_age); |
---|
319 | | - inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE); |
---|
| 377 | + workingset_age_nonresident(lruvec, thp_nr_pages(page)); |
---|
| 378 | + inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); |
---|
320 | 379 | |
---|
321 | 380 | /* Page was active prior to eviction */ |
---|
322 | 381 | if (workingset) { |
---|
323 | 382 | SetPageWorkingset(page); |
---|
324 | | - inc_lruvec_state(lruvec, WORKINGSET_RESTORE); |
---|
| 383 | + /* XXX: Move to lru_cache_add() when it supports new vs putback */ |
---|
| 384 | + spin_lock_irq(&page_pgdat(page)->lru_lock); |
---|
| 385 | + lru_note_cost_page(page); |
---|
| 386 | + spin_unlock_irq(&page_pgdat(page)->lru_lock); |
---|
| 387 | + inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); |
---|
325 | 388 | } |
---|
326 | 389 | out: |
---|
327 | 390 | rcu_read_unlock(); |
---|
.. | .. |
---|
347 | 410 | memcg = page_memcg_rcu(page); |
---|
348 | 411 | if (!mem_cgroup_disabled() && !memcg) |
---|
349 | 412 | goto out; |
---|
350 | | - lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); |
---|
351 | | - atomic_long_inc(&lruvec->inactive_age); |
---|
| 413 | + lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); |
---|
| 414 | + workingset_age_nonresident(lruvec, thp_nr_pages(page)); |
---|
352 | 415 | out: |
---|
353 | 416 | rcu_read_unlock(); |
---|
354 | 417 | } |
---|
.. | .. |
---|
367 | 430 | |
---|
368 | 431 | static struct list_lru shadow_nodes; |
---|
369 | 432 | |
---|
370 | | -void workingset_update_node(struct radix_tree_node *node) |
---|
| 433 | +void workingset_update_node(struct xa_node *node) |
---|
371 | 434 | { |
---|
372 | 435 | /* |
---|
373 | 436 | * Track non-empty nodes that contain only shadow entries; |
---|
.. | .. |
---|
377 | 440 | * already where they should be. The list_empty() test is safe |
---|
378 | 441 | * as node->private_list is protected by the i_pages lock. |
---|
379 | 442 | */ |
---|
380 | | - if (node->count && node->count == node->exceptional) { |
---|
381 | | - if (list_empty(&node->private_list)) |
---|
| 443 | + VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */ |
---|
| 444 | + |
---|
| 445 | + if (node->count && node->count == node->nr_values) { |
---|
| 446 | + if (list_empty(&node->private_list)) { |
---|
382 | 447 | list_lru_add(&shadow_nodes, &node->private_list); |
---|
| 448 | + __inc_lruvec_slab_state(node, WORKINGSET_NODES); |
---|
| 449 | + } |
---|
383 | 450 | } else { |
---|
384 | | - if (!list_empty(&node->private_list)) |
---|
| 451 | + if (!list_empty(&node->private_list)) { |
---|
385 | 452 | list_lru_del(&shadow_nodes, &node->private_list); |
---|
| 453 | + __dec_lruvec_slab_state(node, WORKINGSET_NODES); |
---|
| 454 | + } |
---|
386 | 455 | } |
---|
387 | 456 | } |
---|
388 | 457 | |
---|
.. | .. |
---|
391 | 460 | { |
---|
392 | 461 | unsigned long max_nodes; |
---|
393 | 462 | unsigned long nodes; |
---|
394 | | - unsigned long cache; |
---|
| 463 | + unsigned long pages; |
---|
395 | 464 | |
---|
396 | 465 | nodes = list_lru_shrink_count(&shadow_nodes, sc); |
---|
397 | 466 | |
---|
398 | 467 | /* |
---|
399 | | - * Approximate a reasonable limit for the radix tree nodes |
---|
| 468 | + * Approximate a reasonable limit for the nodes |
---|
400 | 469 | * containing shadow entries. We don't need to keep more |
---|
401 | 470 | * shadow entries than possible pages on the active list, |
---|
402 | 471 | * since refault distances bigger than that are dismissed. |
---|
.. | .. |
---|
411 | 480 | * worst-case density of 1/8th. Below that, not all eligible |
---|
412 | 481 | * refaults can be detected anymore. |
---|
413 | 482 | * |
---|
414 | | - * On 64-bit with 7 radix_tree_nodes per page and 64 slots |
---|
| 483 | + * On 64-bit with 7 xa_nodes per page and 64 slots |
---|
415 | 484 | * each, this will reclaim shadow entries when they consume |
---|
416 | 485 | * ~1.8% of available memory: |
---|
417 | 486 | * |
---|
418 | | - * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE |
---|
| 487 | + * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE |
---|
419 | 488 | */ |
---|
| 489 | +#ifdef CONFIG_MEMCG |
---|
420 | 490 | if (sc->memcg) { |
---|
421 | | - cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, |
---|
422 | | - LRU_ALL_FILE); |
---|
423 | | - } else { |
---|
424 | | - cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) + |
---|
425 | | - node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE); |
---|
426 | | - } |
---|
427 | | - max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3); |
---|
| 491 | + struct lruvec *lruvec; |
---|
| 492 | + int i; |
---|
| 493 | + |
---|
| 494 | + lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); |
---|
| 495 | + for (pages = 0, i = 0; i < NR_LRU_LISTS; i++) |
---|
| 496 | + pages += lruvec_page_state_local(lruvec, |
---|
| 497 | + NR_LRU_BASE + i); |
---|
| 498 | + pages += lruvec_page_state_local( |
---|
| 499 | + lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT; |
---|
| 500 | + pages += lruvec_page_state_local( |
---|
| 501 | + lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT; |
---|
| 502 | + } else |
---|
| 503 | +#endif |
---|
| 504 | + pages = node_present_pages(sc->nid); |
---|
| 505 | + |
---|
| 506 | + max_nodes = pages >> (XA_CHUNK_SHIFT - 3); |
---|
428 | 507 | |
---|
429 | 508 | if (!nodes) |
---|
430 | 509 | return SHRINK_EMPTY; |
---|
.. | .. |
---|
437 | 516 | static enum lru_status shadow_lru_isolate(struct list_head *item, |
---|
438 | 517 | struct list_lru_one *lru, |
---|
439 | 518 | spinlock_t *lru_lock, |
---|
440 | | - void *arg) |
---|
| 519 | + void *arg) __must_hold(lru_lock) |
---|
441 | 520 | { |
---|
| 521 | + struct xa_node *node = container_of(item, struct xa_node, private_list); |
---|
442 | 522 | struct address_space *mapping; |
---|
443 | | - struct radix_tree_node *node; |
---|
444 | | - unsigned int i; |
---|
445 | 523 | int ret; |
---|
446 | 524 | |
---|
447 | 525 | /* |
---|
448 | | - * Page cache insertions and deletions synchroneously maintain |
---|
| 526 | + * Page cache insertions and deletions synchronously maintain |
---|
449 | 527 | * the shadow node LRU under the i_pages lock and the |
---|
450 | 528 | * lru_lock. Because the page cache tree is emptied before |
---|
451 | 529 | * the inode can be destroyed, holding the lru_lock pins any |
---|
452 | | - * address_space that has radix tree nodes on the LRU. |
---|
| 530 | + * address_space that has nodes on the LRU. |
---|
453 | 531 | * |
---|
454 | 532 | * We can then safely transition to the i_pages lock to |
---|
455 | 533 | * pin only the address_space of the particular node we want |
---|
456 | 534 | * to reclaim, take the node off-LRU, and drop the lru_lock. |
---|
457 | 535 | */ |
---|
458 | 536 | |
---|
459 | | - node = container_of(item, struct radix_tree_node, private_list); |
---|
460 | | - mapping = container_of(node->root, struct address_space, i_pages); |
---|
| 537 | + mapping = container_of(node->array, struct address_space, i_pages); |
---|
461 | 538 | |
---|
462 | 539 | /* Coming from the list, invert the lock order */ |
---|
463 | 540 | if (!xa_trylock(&mapping->i_pages)) { |
---|
.. | .. |
---|
467 | 544 | } |
---|
468 | 545 | |
---|
469 | 546 | list_lru_isolate(lru, item); |
---|
| 547 | + __dec_lruvec_slab_state(node, WORKINGSET_NODES); |
---|
| 548 | + |
---|
470 | 549 | spin_unlock(lru_lock); |
---|
471 | 550 | |
---|
472 | 551 | /* |
---|
.. | .. |
---|
474 | 553 | * no pages, so we expect to be able to remove them all and |
---|
475 | 554 | * delete and free the empty node afterwards. |
---|
476 | 555 | */ |
---|
477 | | - if (WARN_ON_ONCE(!node->exceptional)) |
---|
| 556 | + if (WARN_ON_ONCE(!node->nr_values)) |
---|
478 | 557 | goto out_invalid; |
---|
479 | | - if (WARN_ON_ONCE(node->count != node->exceptional)) |
---|
| 558 | + if (WARN_ON_ONCE(node->count != node->nr_values)) |
---|
480 | 559 | goto out_invalid; |
---|
481 | | - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { |
---|
482 | | - if (node->slots[i]) { |
---|
483 | | - if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i]))) |
---|
484 | | - goto out_invalid; |
---|
485 | | - if (WARN_ON_ONCE(!node->exceptional)) |
---|
486 | | - goto out_invalid; |
---|
487 | | - if (WARN_ON_ONCE(!mapping->nrexceptional)) |
---|
488 | | - goto out_invalid; |
---|
489 | | - node->slots[i] = NULL; |
---|
490 | | - node->exceptional--; |
---|
491 | | - node->count--; |
---|
492 | | - mapping->nrexceptional--; |
---|
493 | | - } |
---|
494 | | - } |
---|
495 | | - if (WARN_ON_ONCE(node->exceptional)) |
---|
496 | | - goto out_invalid; |
---|
497 | | - inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); |
---|
498 | | - __radix_tree_delete_node(&mapping->i_pages, node, |
---|
499 | | - workingset_lookup_update(mapping)); |
---|
| 560 | + mapping->nrexceptional -= node->nr_values; |
---|
| 561 | + xa_delete_node(node, workingset_update_node); |
---|
| 562 | + __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); |
---|
500 | 563 | |
---|
501 | 564 | out_invalid: |
---|
502 | 565 | xa_unlock_irq(&mapping->i_pages); |
---|
.. | .. |
---|
518 | 581 | static struct shrinker workingset_shadow_shrinker = { |
---|
519 | 582 | .count_objects = count_shadow_nodes, |
---|
520 | 583 | .scan_objects = scan_shadow_nodes, |
---|
521 | | - .seeks = DEFAULT_SEEKS, |
---|
| 584 | + .seeks = 0, /* ->count reports only fully expendable nodes */ |
---|
522 | 585 | .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, |
---|
523 | 586 | }; |
---|
524 | 587 | |
---|
.. | .. |
---|
543 | 606 | * double the initial memory by using totalram_pages as-is. |
---|
544 | 607 | */ |
---|
545 | 608 | timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; |
---|
546 | | - max_order = fls_long(totalram_pages - 1); |
---|
| 609 | + max_order = fls_long(totalram_pages() - 1); |
---|
547 | 610 | if (max_order > timestamp_bits) |
---|
548 | 611 | bucket_order = max_order - timestamp_bits; |
---|
549 | 612 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", |
---|