From ca9344dbd5c31251488708fa75e322aa41ca2731 Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Mon, 3 Feb 2025 08:35:43 +0100 Subject: [PATCH] pagecache: drain user-mapped pages when low on memory WIP --- src/kernel/flush.c | 31 ++- src/kernel/page.c | 83 ++++--- src/kernel/page.h | 8 +- src/kernel/pagecache.c | 405 +++++++++++++++++++++++++------- src/kernel/pagecache.h | 12 +- src/kernel/pagecache_internal.h | 12 +- src/kernel/schedule.c | 2 +- src/runtime/buffer.c | 9 + src/runtime/buffer.h | 2 + src/unix/exec.c | 10 +- src/unix/mmap.c | 51 ++-- src/unix/unix_internal.h | 1 + src/virtio/virtio_balloon.c | 4 +- src/x86_64/page.c | 2 +- src/x86_64/page_machine.h | 12 + 15 files changed, 474 insertions(+), 170 deletions(-) diff --git a/src/kernel/flush.c b/src/kernel/flush.c index 2b12c8ff9..1847ddd39 100644 --- a/src/kernel/flush.c +++ b/src/kernel/flush.c @@ -21,8 +21,11 @@ struct flush_entry { u64 gen; struct refcount ref; boolean flush; + volatile boolean wait; + u32 joined; u64 pages[FLUSH_THRESHOLD]; int npages; + thunk completion; closure_struct(thunk, finish); }; @@ -57,6 +60,11 @@ static void _flush_handler(void) invalidate(f->pages[i]); } } + if (f->wait) { + fetch_and_add_32(&f->joined, 1); + while (f->wait) + kern_pause(); + } refcount_release(&f->ref); } } @@ -97,6 +105,9 @@ static void service_list(void) continue; list_delete(&f->l); entries_count--; + thunk completion = f->completion; + if (completion) + async_apply(completion); assert(enqueue(free_flush_entries, f)); } } @@ -119,7 +130,7 @@ static void queue_flush_service(void) } } -void page_invalidate_sync(flush_entry f) +void page_invalidate_sync(flush_entry f, thunk completion, boolean wait) { if (initialized) { if (f->npages == 0) { @@ -151,11 +162,27 @@ void page_invalidate_sync(flush_entry f) f->gen = fetch_and_add((word *)&inval_gen, 1) + 1; spin_wunlock(&flush_lock); - send_ipi(TARGET_EXCLUSIVE_BROADCAST, flush_ipi); + f->wait = false; _flush_handler(); + f->wait = wait; + if (wait) { + f->joined = 1; + f->completion = 0; + } else { + f->completion = completion; + } + send_ipi(TARGET_EXCLUSIVE_BROADCAST, flush_ipi); irq_restore(flags); + if (wait) { + while (((volatile flush_entry)f)->joined < total_processors) + kern_pause(); + apply(completion); + f->wait = false; + } } else { flush_tlb(false); + if (completion) + async_apply(completion); } } diff --git a/src/kernel/page.c b/src/kernel/page.c index 3faff9cb4..4e136c16b 100644 --- a/src/kernel/page.c +++ b/src/kernel/page.c @@ -275,7 +275,7 @@ void update_map_flags(u64 vaddr, u64 length, pageflags flags) assert(!intersects_linear_backed(irangel(vaddr, length))); flush_entry fe = get_page_flush_entry(); traverse_ptes(vaddr, length, stack_closure(update_pte_flags, vaddr, length, flags, fe)); - page_invalidate_sync(fe); + page_invalidate_sync(fe, 0, false); #ifdef PAGE_DUMP_ALL early_debug("update_map_flags "); dump_page_tables(vaddr, length); @@ -347,7 +347,7 @@ void remap_pages(u64 vaddr_new, u64 vaddr_old, u64 length) irange(vaddr_old, vaddr_old + length)))); flush_entry fe = get_page_flush_entry(); traverse_ptes(vaddr_old, length, stack_closure(remap_entry, vaddr_new, vaddr_old, length, fe)); - page_invalidate_sync(fe); + page_invalidate_sync(fe, 0, false); #ifdef PAGE_DUMP_ALL early_debug("remap "); dump_page_tables(vaddr_new, length); @@ -376,10 +376,9 @@ void zero_mapped_pages(u64 vaddr, u64 length) /* called with lock held */ closure_function(4, 3, boolean, unmap_page, - u64, vstart, u64, len, range_handler, rh, flush_entry, fe, + u64, vstart, u64, len, buffer, phys_ranges, flush_entry, fe, int level, u64 vaddr, pteptr entry) { - range_handler rh = bound(rh); u64 old_entry = pte_from_pteptr(entry); if (pte_is_present(old_entry) && pte_is_mapping(level, old_entry)) { #ifdef PAGE_UPDATE_DEBUG @@ -405,21 +404,21 @@ closure_function(4, 3, boolean, unmap_page, return false; } page_invalidate(bound(fe), vaddr); - if (rh) { - apply(rh, irangel(page_from_pte(old_entry) + map_offset, unmap_len)); + buffer phys_ranges = bound(phys_ranges); + if (phys_ranges) { + range r = irangel(page_from_pte(old_entry) + map_offset, unmap_len); + return buffer_write(phys_ranges, &r, sizeof(r)); } } return true; } -/* Be warned: the page table lock is held when rh is called; don't try - to modify the page table while traversing it */ -void unmap_pages_with_handler(u64 virtual, u64 length, range_handler rh) +void unmap(u64 virtual, u64 length) { assert(!((virtual & PAGEMASK) || (length & PAGEMASK))); flush_entry fe = get_page_flush_entry(); - traverse_ptes(virtual, length, stack_closure(unmap_page, virtual, length, rh, fe)); - page_invalidate_sync(fe); + traverse_ptes(virtual, length, stack_closure(unmap_page, virtual, length, 0, fe)); + page_invalidate_sync(fe, 0, false); #ifdef PAGE_DUMP_ALL early_debug("unmap "); dump_page_tables(virtual, length); @@ -564,7 +563,7 @@ void map(u64 v, physical p, u64 length, pageflags flags) } page_init_debug("map_level done\n"); pagetable_unlock(); - page_invalidate_sync(fe); + page_invalidate_sync(fe, 0, false); #ifdef PAGE_DUMP_ALL early_debug("map "); dump_page_tables(v, length); @@ -580,29 +579,56 @@ void map_nolock(u64 v, physical p, u64 length, pageflags flags) map_level(table_ptr, PT_FIRST_LEVEL, r, &p, flags.w, 0); } -void unmap(u64 virtual, u64 length) -{ - page_init_debug("unmap v: "); - page_init_debug_u64(virtual); - page_init_debug(", length: "); - page_init_debug_u64(length); - page_init_debug("\n"); - unmap_pages(virtual, length); +#ifdef KERNEL +closure_function(2, 0, void, unmap_and_free_phys_complete, + buffer, phys_ranges, boolean, on_stack) +{ + heap h = heap_physical(get_kernel_heaps()); + buffer phys_ranges = bound(phys_ranges); + range *r; + while ((r = buffer_pop(phys_ranges, sizeof(*r)))) + deallocate(h, r->start, range_span(*r)); + if (bound(on_stack)) { + /* clear the buffer so it can be reused if there are other iterations */ + buffer_clear(phys_ranges); + } else { + deallocate_buffer(phys_ranges); + closure_finish(); + } } -closure_function(1, 1, boolean, page_dealloc, - heap, pageheap, - range r) +static void unmap_and_free_phys_sync(u64 virtual, u64 length) { - u64 virt = pagemem.pagevirt.start + r.start; - deallocate_u64(bound(pageheap), virt, range_span(r)); - return true; + kernel_context kc = (kernel_context)get_current_context(current_cpu()); + buffer phys_ranges = little_stack_buffer(kc->size / 2); + thunk completion = stack_closure(unmap_and_free_phys_complete, phys_ranges, true); + boolean done, progress; + do { + flush_entry fe = get_page_flush_entry(); + done = traverse_ptes(virtual, length, + stack_closure(unmap_page, virtual, length, phys_ranges, fe)); + progress = buffer_length(phys_ranges) != 0; + page_invalidate_sync(fe, completion, true); + } while (!done && progress); } void unmap_and_free_phys(u64 virtual, u64 length) { - unmap_pages_with_handler(virtual, length, - stack_closure(page_dealloc, (heap)heap_page_backed(get_kernel_heaps()))); + heap h = heap_locked(get_kernel_heaps()); + buffer phys_ranges = allocate_buffer(h, 64 * sizeof(range)); + if (phys_ranges == INVALID_ADDRESS) + return unmap_and_free_phys_sync(virtual, length); + thunk completion = closure(h, unmap_and_free_phys_complete, phys_ranges, false); + if (completion == INVALID_ADDRESS) { + deallocate_buffer(phys_ranges); + return unmap_and_free_phys_sync(virtual, length); + } + flush_entry fe = get_page_flush_entry(); + boolean success = traverse_ptes(virtual, length, + stack_closure(unmap_page, virtual, length, phys_ranges, fe)); + page_invalidate_sync(fe, completion, !success); + if (!success) + unmap_and_free_phys_sync(virtual, length); } void page_free_phys(u64 phys) @@ -610,6 +636,7 @@ void page_free_phys(u64 phys) u64 virt = pagemem.pagevirt.start + phys; deallocate_u64((heap)get_kernel_heaps()->pages, virt, PAGESIZE); } +#endif static boolean init_page_map(range phys, range *curr_virt, id_heap virt_heap, pageflags flags) { diff --git a/src/kernel/page.h b/src/kernel/page.h index 131b78752..c95845384 100644 --- a/src/kernel/page.h +++ b/src/kernel/page.h @@ -25,7 +25,7 @@ void init_page_tables(heap pageheap); void init_flush(heap); flush_entry get_page_flush_entry(); void page_invalidate(flush_entry f, u64 address); -void page_invalidate_sync(flush_entry f); +void page_invalidate_sync(flush_entry f, thunk completion, boolean wait); void page_invalidate_flush(); void invalidate(u64 page); @@ -43,12 +43,8 @@ void update_map_flags(u64 vaddr, u64 length, pageflags flags); void zero_mapped_pages(u64 vaddr, u64 length); void remap_pages(u64 vaddr_new, u64 vaddr_old, u64 length); void unmap(u64 virtual, u64 length); -void unmap_pages_with_handler(u64 virtual, u64 length, range_handler rh); -static inline void unmap_pages(u64 virtual, u64 length) -{ - unmap_pages_with_handler(virtual, length, 0); -} +#define unmap_pages(virtual, length) unmap(virtual, length) #include diff --git a/src/kernel/pagecache.c b/src/kernel/pagecache.c index 0186852a2..41365e493 100644 --- a/src/kernel/pagecache.c +++ b/src/kernel/pagecache.c @@ -24,6 +24,20 @@ queueing a ton with the polled ATA driver. There's only one queue globally anyhow. */ #define MAX_PAGE_COMPLETION_VECS 16384 +typedef struct pagecache_page_entry { + union { + pteptr pte_ptr; + pte pte; + }; + pagecache_page pp; +} *pagecache_page_entry; + +typedef struct pagecache_drain_work { + struct buffer page_entries; + closure_struct(thunk, inval_complete); +} *pagecache_drain_work; + + BSS_RO_AFTER_INIT static pagecache global_pagecache; static inline u64 cache_pagesize(pagecache pc) @@ -797,6 +811,62 @@ static void pagecache_delete_pages_locked(pagecache pc) } } +closure_function(2, 3, boolean, pagecache_check_old_page, + pagecache_map, pcm, flush_entry, fe, + int level, u64 vaddr, pteptr entry) +{ + pagecache pc = global_pagecache; + pagecache_map pcm = bound(pcm); + pte old_entry = pte_from_pteptr(entry); + if (pte_is_present(old_entry) && pte_is_mapping(level, old_entry) && !pte_is_dirty(old_entry)) { + u64 pi = (pcm->node_offset + vaddr - pcm->n.r.start) >> pc->page_order; + pagecache_node pn = pcm->pn; + pagecache_lock_node(pn); + pagecache_page pp = page_lookup_nodelocked(pn, pi); + if ((pp != INVALID_ADDRESS) && (page_from_pte(old_entry) == pp->phys)) { + flush_entry fe = bound(fe); + if (pte_clear_accessed(entry)) { + page_invalidate(fe, vaddr); + pagecache_lock_state(pc); + touch_page_locked(pn, pp, 0); + pagecache_unlock_state(pc); + } else if (pp->evicted) { + page_invalidate(fe, vaddr); + } + } + pagecache_unlock_node(pn); + } + return true; +} + +static void pagecache_scan_old_maps(list head, flush_entry fe) +{ + list_foreach(head, l) { + pagecache_map pcm = struct_from_list(l, pagecache_map, l); + traverse_ptes(pcm->n.r.start, range_span(pcm->n.r), + stack_closure(pagecache_check_old_page, pcm, fe)); + } +} + +closure_func_basic(thunk, void, pagecache_inval_complete) +{ + pagecache_drain_work drain_work = struct_from_closure(pagecache_drain_work, inval_complete); + pagecache_page_entry entry; + pagecache pc = global_pagecache; + pagecache_lock_state(pc); + while ((entry = buffer_pop(&drain_work->page_entries, sizeof(*entry))) != 0) { + pteptr pte_ptr = entry->pte_ptr; + pagecache_page pp = entry->pp; + pte pt_entry = pte_from_pteptr(pte_ptr); + if (pte_is_present(pt_entry) && (page_from_pte(pt_entry) == pp->phys) && + !pte_is_dirty(pt_entry) && !pte_is_accessed(pt_entry)) { + pte_set(pte_ptr, 0); + pagecache_page_release_locked(pc, pp, false); + } + } + pagecache_unlock_state(pc); +} + u64 pagecache_drain(u64 drain_bytes) { pagecache pc = global_pagecache; @@ -814,6 +884,17 @@ u64 pagecache_drain(u64 drain_bytes) if (drained < drain_bytes) drained += cache_drain((caching_heap)pc->completions, drain_bytes - drained, PAGECACHE_COMPLETIONS_RETAIN * sizeof(struct page_completion)); + if (drained < drain_bytes) { + struct pagecache_drain_work drain_work; + init_buffer(&drain_work.page_entries, 0, false, pc->h, 0); + flush_entry fe = get_page_flush_entry(); + pagecache_scan_old_maps(&pc->shared_maps, fe); + pagecache_scan_old_maps(&pc->private_maps, fe); + page_invalidate_sync(fe, init_closure_func(&drain_work.inval_complete, thunk, + pagecache_inval_complete), + true); + buffer_set_capacity(&drain_work.page_entries, 0); + } return drained; } @@ -839,7 +920,6 @@ static void pagecache_finish_pending_writes(pagecache pc, pagecache_volume pv, p } static void pagecache_scan_shared_mappings(pagecache pc); -static void pagecache_scan_node(pagecache_node pn); closure_function(5, 1, void, pagecache_commit_complete, pagecache, pc, pagecache_page, first_page, u64, page_count, sg_list, sg, status_handler, sh, @@ -1114,8 +1194,7 @@ void pagecache_node_finish_pending_writes(pagecache_node pn, status_handler comp void pagecache_sync_node(pagecache_node pn, status_handler complete) { pagecache_debug("%s: pn %p, complete %p (%F)\n", func_ss, pn, complete, complete); - pagecache_scan_node(pn); - pagecache_commit_dirty_node(pn, complete); + pagecache_node_scan(pn, irange(0, infinity), complete); } closure_function(1, 1, boolean, purge_range_handler, @@ -1440,11 +1519,12 @@ closure_function(1, 3, void, pagecache_read_sg, closure_function(3, 3, boolean, pagecache_check_dirty_page, - pagecache, pc, pagecache_shared_map, sm, flush_entry, fe, + pagecache_map, pcm, flush_entry, fe, buffer, ptes, int level, u64 vaddr, pteptr entry) { - pagecache pc = bound(pc); - pagecache_shared_map sm = bound(sm); + pagecache pc = global_pagecache; + pagecache_map sm = bound(pcm); + buffer ptes = bound(ptes); pte old_entry = pte_from_pteptr(entry); if (pte_is_present(old_entry) && pte_is_mapping(level, old_entry) && @@ -1452,7 +1532,6 @@ closure_function(3, 3, boolean, pagecache_check_dirty_page, range r = irangel(sm->node_offset + (vaddr - sm->n.r.start), cache_pagesize(pc)); u64 pi = r.start >> pc->page_order; pagecache_debug(" dirty: vaddr 0x%lx, pi 0x%lx\n", vaddr, pi); - pt_pte_clean(entry); page_invalidate(bound(fe), vaddr); pagecache_node pn = sm->pn; pagecache_lock_node(pn); @@ -1464,40 +1543,55 @@ closure_function(3, 3, boolean, pagecache_check_dirty_page, pp->refcount++; } pagecache_unlock_state(pc); - pagecache_set_dirty(pn, r); + boolean success = buffer_write(ptes, &entry, sizeof(entry)); + if (success) { + success = pagecache_set_dirty(pn, r); + if (!success) + buffer_produce(ptes, -sizeof(entry)); + } pagecache_unlock_node(pn); + return success; } return true; } -static void pagecache_scan_shared_map(pagecache pc, pagecache_shared_map sm, flush_entry fe) +closure_function(1, 0, void, pagecache_clean_ptes, + buffer, ptes) { - traverse_ptes(sm->n.r.start, range_span(sm->n.r), - stack_closure(pagecache_check_dirty_page, pc, sm, fe)); + buffer ptes = bound(ptes); + pteptr *entry; + while ((entry = buffer_pop(ptes, sizeof(*entry)))) + pt_pte_clean(*entry); + + /* clear the buffer so it can be reused if there are other iterations */ + buffer_clear(ptes); } -static void pagecache_scan_shared_mappings(pagecache pc) +static boolean pagecache_scan_shared_map(pagecache_map sm, flush_entry fe, buffer ptes) { - pagecache_debug("%s\n", func_ss); - flush_entry fe = get_page_flush_entry(); - list_foreach(&pc->shared_maps, l) { - pagecache_shared_map sm = struct_from_list(l, pagecache_shared_map, l); - pagecache_debug(" shared map va %R, node_offset 0x%lx\n", sm->n.r, sm->node_offset); - pagecache_scan_shared_map(pc, sm, fe); - } - page_invalidate_sync(fe); + return traverse_ptes(sm->n.r.start, range_span(sm->n.r), + stack_closure(pagecache_check_dirty_page, sm, fe, ptes)); } -static void pagecache_scan_node(pagecache_node pn) +static void pagecache_scan_shared_mappings(pagecache pc) { pagecache_debug("%s\n", func_ss); - flush_entry fe = get_page_flush_entry(); - rangemap_foreach(pn->shared_maps, n) { - pagecache_shared_map sm = (pagecache_shared_map)n; - pagecache_debug(" shared map va %R, node_offset 0x%lx\n", n->r, sm->node_offset); - pagecache_scan_shared_map(pn->pv->pc, sm, fe); - } - page_invalidate_sync(fe); + kernel_context kc = (kernel_context)get_current_context(current_cpu()); + buffer ptes = little_stack_buffer(kc->size / 2); + thunk completion = stack_closure(pagecache_clean_ptes, ptes); + boolean done = true, progress; + do { + flush_entry fe = get_page_flush_entry(); + list_foreach(&pc->shared_maps, l) { + pagecache_map sm = struct_from_list(l, pagecache_map, l); + pagecache_debug(" shared map va %R, node_offset 0x%lx\n", sm->n.r, sm->node_offset); + done = pagecache_scan_shared_map(sm, fe, ptes); + if (!done) + break; + } + progress = buffer_length(ptes) != 0; + page_invalidate_sync(fe, completion, true); + } while (!done && progress); } closure_func_basic(timer_handler, void, pagecache_scan_timer, @@ -1518,28 +1612,30 @@ closure_func_basic(status_handler, void, pagecache_writeback_complete, pc->writeback_in_progress = false; } -void pagecache_node_add_shared_map(pagecache_node pn, range q /* bytes */, u64 node_offset) +void pagecache_node_add_mapping(pagecache_node pn, range q /* bytes */, u64 node_offset, + boolean shared) { pagecache pc = pn->pv->pc; - pagecache_shared_map sm = allocate(pc->h, sizeof(struct pagecache_shared_map)); - assert(sm != INVALID_ADDRESS); - sm->n.r = q; - sm->pn = pn; - sm->node_offset = node_offset; + pagecache_map pcm = allocate(pc->h, sizeof(struct pagecache_map)); + assert(pcm != INVALID_ADDRESS); + pcm->n.r = q; + pcm->pn = pn; + pcm->node_offset = node_offset; + pcm->shared = shared; pagecache_debug("%s: pn %p, q %R, node_offset 0x%lx\n", func_ss, pn, q, node_offset); pagecache_lock_state(pc); - list_insert_before(&pc->shared_maps, &sm->l); - assert(rangemap_insert(pn->shared_maps, &sm->n)); + list_insert_before(shared ? &pc->shared_maps : &pc->private_maps, &pcm->l); + assert(rangemap_insert(pn->mappings, &pcm->n)); pagecache_unlock_state(pc); } -closure_function(3, 1, boolean, close_shared_pages_intersection, - pagecache_node, pn, range, q, flush_entry, fe, +closure_function(3, 1, boolean, pagecache_node_unmap_intersection, + pagecache_node, pn, range, q, boolean *, shared_mappings, rmnode n) { pagecache_node pn = bound(pn); pagecache pc = pn->pv->pc; - pagecache_shared_map sm = (pagecache_shared_map)n; + pagecache_map pcm = (pagecache_map)n; range rn = n->r; range ri = range_intersection(bound(q), rn); boolean head = ri.start > rn.start; @@ -1548,56 +1644,62 @@ closure_function(3, 1, boolean, close_shared_pages_intersection, pagecache_debug(" intersection %R, head %d, tail %d\n", ri, head, tail); /* scan intersecting map regardless of editing */ - pagecache_scan_shared_map(pc, sm, bound(fe)); + if (pcm->shared) + *bound(shared_mappings) = true; if (!head && !tail) { - rangemap_remove_node(pn->shared_maps, n); - list_delete(&sm->l); - deallocate(pc->h, sm, sizeof(struct pagecache_shared_map)); + rangemap_remove_node(pn->mappings, n); + list_delete(&pcm->l); + deallocate(pc->h, pcm, sizeof(struct pagecache_map)); } else if (head) { /* truncate map at start */ - assert(rangemap_reinsert(pn->shared_maps, n, irange(rn.start, ri.start))); + assert(rangemap_reinsert(pn->mappings, n, irange(rn.start, ri.start))); if (tail) { /* create map at tail end */ - pagecache_node_add_shared_map(pn, irange(ri.end, rn.end), - sm->node_offset + (ri.end - rn.start)); + pagecache_node_add_mapping(pn, irange(ri.end, rn.end), + pcm->node_offset + (ri.end - rn.start), pcm->shared); } } else { /* tail only: move map start back */ - assert(rangemap_reinsert(pn->shared_maps, n, irange(ri.end, rn.end))); - sm->node_offset += ri.end - rn.start; + assert(rangemap_reinsert(pn->mappings, n, irange(ri.end, rn.end))); + pcm->node_offset += ri.end - rn.start; } return true; } -void pagecache_node_close_shared_pages(pagecache_node pn, range q /* bytes */, flush_entry fe) -{ - pagecache_debug("%s: node %p, q %R\n", func_ss, pn, q); - rangemap_range_lookup(pn->shared_maps, q, - stack_closure(close_shared_pages_intersection, pn, q, fe)); -} - closure_function(2, 1, boolean, scan_shared_pages_intersection, - pagecache, pc, flush_entry, fe, + flush_entry, fe, buffer, ptes, rmnode n) { /* currently just scanning the whole map - it could be just a range, but with scan and sync timers imminent, does it really matter? */ - pagecache_shared_map sm = (pagecache_shared_map)n; - pagecache_debug(" map %p\n", sm); - pagecache_scan_shared_map(bound(pc), sm, bound(fe)); + pagecache_map pcm = (pagecache_map)n; + if (pcm->shared) { + pagecache_debug(" map %p\n", pcm); + return pagecache_scan_shared_map(pcm, bound(fe), bound(ptes)); + } return true; } -void pagecache_node_scan_and_commit_shared_pages(pagecache_node pn, range q /* bytes */) +void pagecache_node_scan(pagecache_node pn, range q /* bytes */, status_handler complete) { pagecache_debug("%s: node %p, q %R\n", func_ss, pn, q); - flush_entry fe = get_page_flush_entry(); - rangemap_range_lookup(pn->shared_maps, q, - stack_closure(scan_shared_pages_intersection, pn->pv->pc, fe)); - pagecache_commit_dirty_node(pn, 0); - page_invalidate_sync(fe); + kernel_context kc = (kernel_context)get_current_context(current_cpu()); + buffer ptes = little_stack_buffer(kc->size / 2); + thunk completion = stack_closure(pagecache_clean_ptes, ptes); + boolean done = true, progress; + do { + flush_entry fe = get_page_flush_entry(); + done = (rangemap_range_lookup(pn->mappings, q, + stack_closure(scan_shared_pages_intersection, fe, ptes)) != + RM_ABORT); + progress = buffer_length(ptes) != 0; + page_invalidate_sync(fe, completion, true); + } while (!done && progress); + if (!done && complete) + return apply(complete, timm_oom); + pagecache_commit_dirty_node(pn, complete); } boolean pagecache_node_do_page_cow(pagecache_node pn, u64 node_offset, u64 vaddr, pageflags flags) @@ -1653,13 +1755,36 @@ void pagecache_node_fetch_pages(pagecache_node pn, range r, sg_list sg, status_h pagecache_node_fetch_internal(pn, r, ph, complete ? complete : ignore_status); } -closure_function(2, 1, void, get_page_finish, - pagecache_page, pp, pagecache_page_handler, handler, +static void *pagecache_get_private_page(pagecache_page pp) +{ + void *kvirt; + pagecache pc = global_pagecache; + pagecache_lock_state(pc); + if ((pp->refcount == 1) && !pp->evicted) { + pp->refcount = 0; + change_page_state_locked(pc, pp, PAGECACHE_PAGESTATE_FREE); + fetch_and_add(&pc->total_pages, -1); + pagecache_page_delete_locked(pc, pp); + kvirt = pp->kvirt; + } else { + u64 pagesize = cache_pagesize(pc); + kvirt = allocate(pc->contiguous, pagesize); + if (kvirt != INVALID_ADDRESS) + runtime_memcpy(kvirt, pp->kvirt, pagesize); + } + pagecache_unlock_state(pc); + return kvirt; +} + +closure_function(3, 1, void, get_page_finish, + pagecache_page, pp, boolean, private, pagecache_page_handler, handler, status s) { pagecache_page_handler handler = bound(handler); if (is_ok(s)) { - apply(handler, bound(pp)->kvirt); + pagecache_page pp = bound(pp); + void *kvirt = !bound(private) ? pp->kvirt : pagecache_get_private_page(pp); + apply(handler, kvirt); } else { apply(handler, INVALID_ADDRESS); } @@ -1667,7 +1792,8 @@ closure_function(2, 1, void, get_page_finish, } /* not context restoring */ -void pagecache_get_page(pagecache_node pn, u64 node_offset, pagecache_page_handler handler) +void pagecache_get_page(pagecache_node pn, u64 node_offset, boolean private, + pagecache_page_handler handler) { pagecache pc = pn->pv->pc; pagecache_lock_node(pn); @@ -1680,7 +1806,7 @@ void pagecache_get_page(pagecache_node pn, u64 node_offset, pagecache_page_handl apply(handler, INVALID_ADDRESS); return; } - merge m = allocate_merge(pc->h, closure(pc->h, get_page_finish, pp, handler)); + merge m = allocate_merge(pc->h, closure(pc->h, get_page_finish, pp, private, handler)); status_handler k = apply_merge(m); touch_or_fill_page_nodelocked(pn, pp, m); pagecache_unlock_node(pn); @@ -1688,7 +1814,7 @@ void pagecache_get_page(pagecache_node pn, u64 node_offset, pagecache_page_handl } /* no-alloc / no-fill path */ -void *pagecache_get_page_if_filled(pagecache_node pn, u64 node_offset) +void *pagecache_get_page_if_filled(pagecache_node pn, u64 node_offset, boolean private) { pagecache_lock_node(pn); pagecache_page pp = page_lookup_nodelocked(pn, node_offset >> pn->pv->pc->page_order); @@ -1699,7 +1825,7 @@ void *pagecache_get_page_if_filled(pagecache_node pn, u64 node_offset) goto out; } if (touch_or_fill_page_nodelocked(pn, pp, 0)) - kvirt = pp->kvirt; + kvirt = !private ? pp->kvirt : pagecache_get_private_page(pp); else kvirt = INVALID_ADDRESS; out: @@ -1721,8 +1847,8 @@ void pagecache_release_page(pagecache_node pn, u64 node_offset) pagecache_unlock_node(pn); } -closure_function(4, 3, boolean, pagecache_unmap_page_nodelocked, - pagecache_node, pn, u64, vaddr_base, u64, node_offset, flush_entry, fe, +closure_function(6, 3, boolean, pagecache_unmap_page_nodelocked, + pagecache_node, pn, u64, vaddr_base, u64, node_offset, flush_entry, fe, boolean, do_unmap, buffer, unmap_entries, int level, u64 vaddr, pteptr entry) { pte old_entry = pte_from_pteptr(entry); @@ -1730,36 +1856,132 @@ closure_function(4, 3, boolean, pagecache_unmap_page_nodelocked, pte_is_mapping(level, old_entry)) { u64 pi = (bound(node_offset) + (vaddr - bound(vaddr_base))) >> PAGELOG; pagecache_debug(" vaddr 0x%lx, pi 0x%lx\n", vaddr, pi); - pte_set(entry, 0); page_invalidate(bound(fe), vaddr); pagecache_page pp = page_lookup_nodelocked(bound(pn), pi); assert(pp != INVALID_ADDRESS); - u64 phys = page_from_pte(old_entry); - pagecache pc = bound(pn)->pv->pc; - if (phys == pp->phys) { - /* shared or cow */ - assert(pp->refcount >= 1); + struct pagecache_page_entry e; + if (bound(do_unmap)) { + e.pte = pte_from_pteptr(entry); + pte_set(entry, 0); + } else { + e.pte_ptr = entry; + } + e.pp = pp; + if (!buffer_write(bound(unmap_entries), &e, sizeof(e))) + return false; + } + return true; +} + +closure_function(4, 0, void, pagecache_node_unmap_pages_complete, + buffer, unmap_entries, boolean, shared_mappings, boolean, on_stack, bytes *, remaining) +{ + buffer unmap_entries = bound(unmap_entries); + boolean check_dirty = bound(shared_mappings); + pagecache_page_entry e; + while ((e = buffer_pop(unmap_entries, sizeof(*e)))) { + pteptr pte_ptr; + pte old_pte; + if (check_dirty) { + pte_ptr = e->pte_ptr; + old_pte = pte_from_pteptr(pte_ptr); + } else { + old_pte = e->pte; + } + pagecache pc = global_pagecache; + pagecache_page pp = e->pp; + if (check_dirty && pte_is_dirty(old_pte)) { + pagecache_node pn = pp->node; + pagecache_lock_node(pn); + boolean success = pagecache_set_dirty(pn, range_lshift(irangel(page_offset(pp), 1), + pc->page_order)); + if (success) { + pagecache_lock_state(pc); + if (page_state(pp) != PAGECACHE_PAGESTATE_DIRTY) { + change_page_state_locked(pc, pp, PAGECACHE_PAGESTATE_DIRTY); + pp->refcount++; + } + pagecache_unlock_state(pc); + } + pagecache_unlock_node(pn); + if (!success) + break; + } + if (check_dirty) + pte_set(pte_ptr, 0); + u64 phys = page_from_pte(old_pte); + if (phys == pp->phys) { /* shared or CoW */ pagecache_lock_state(pc); pagecache_page_release_locked(pc, pp, false); pagecache_unlock_state(pc); - } else { - /* private copy: free physical page */ + } else { /* private copy */ page_free_phys(phys); } } - return true; + bytes *remaining = bound(remaining); + if (remaining) + *remaining = buffer_length(unmap_entries); + if (bound(on_stack)) { + /* clear the buffer so it can be reused if there are other iterations */ + buffer_clear(unmap_entries); + } else { + deallocate_buffer(unmap_entries); + closure_finish(); + } +} + +static void pagecache_node_unmap_pages_sync(pagecache_node pn, range v, u64 node_offset, + boolean shared_mappings) +{ + kernel_context kc = (kernel_context)get_current_context(current_cpu()); + buffer unmap_entries = little_stack_buffer(kc->size / 2); + bytes remaining; + thunk completion = stack_closure(pagecache_node_unmap_pages_complete, unmap_entries, + shared_mappings, true, &remaining); + boolean done, progress; + do { + flush_entry fe = get_page_flush_entry(); + pagecache_lock_node(pn); + done = traverse_ptes(v.start, range_span(v), + stack_closure(pagecache_unmap_page_nodelocked, pn, v.start, + node_offset, fe, !shared_mappings, unmap_entries)); + pagecache_unlock_node(pn); + bytes queued = buffer_length(unmap_entries); + page_invalidate_sync(fe, completion, true); + if (done && remaining) + done = false; + progress = (remaining < queued); + } while (!done && progress); } void pagecache_node_unmap_pages(pagecache_node pn, range v /* bytes */, u64 node_offset) { pagecache_debug("%s: pn %p, v %R, node_offset 0x%lx\n", func_ss, pn, v, node_offset); + boolean shared_mappings = false; + rangemap_range_lookup(pn->mappings, v, + stack_closure(pagecache_node_unmap_intersection, pn, v, + &shared_mappings)); + if (shared_mappings) + return pagecache_node_unmap_pages_sync(pn, v, node_offset, true); + heap h = global_pagecache->h; + buffer unmap_entries = allocate_buffer(h, 512); + if (unmap_entries == INVALID_ADDRESS) + return pagecache_node_unmap_pages_sync(pn, v, node_offset, false); + thunk completion = closure(h, pagecache_node_unmap_pages_complete, unmap_entries, false, false, + 0); + if (completion == INVALID_ADDRESS) { + deallocate_buffer(unmap_entries); + return pagecache_node_unmap_pages_sync(pn, v, node_offset, false); + } flush_entry fe = get_page_flush_entry(); - pagecache_node_close_shared_pages(pn, v, fe); pagecache_lock_node(pn); - traverse_ptes(v.start, range_span(v), stack_closure(pagecache_unmap_page_nodelocked, pn, - v.start, node_offset, fe)); + boolean success = traverse_ptes(v.start, range_span(v), + stack_closure(pagecache_unmap_page_nodelocked, pn, v.start, + node_offset, fe, true, unmap_entries)); pagecache_unlock_node(pn); - page_invalidate_sync(fe); + page_invalidate_sync(fe, completion, !success); + if (!success) + pagecache_node_unmap_pages_sync(pn, v, node_offset, false); } closure_func_basic(rbnode_handler, boolean, pagecache_page_print_key, @@ -1808,7 +2030,7 @@ closure_function(1, 1, boolean, pagecache_page_release, closure_func_basic(rmnode_handler, boolean, pagecache_node_assert, rmnode n) { - /* A pagecache node being deallocated must not have any shared maps. */ + /* A pagecache node being deallocated must not have any mappings. */ assert(0); return false; } @@ -1826,7 +2048,7 @@ closure_func_basic(thunk, void, pagecache_node_free) deallocate_closure(pn->cache_write); pagecache pc = pn->pv->pc; destruct_rbtree(&pn->pages, stack_closure(pagecache_page_release, pc)); - deallocate_rangemap(pn->shared_maps, stack_closure_func(rmnode_handler, pagecache_node_assert)); + deallocate_rangemap(pn->mappings, stack_closure_func(rmnode_handler, pagecache_node_assert)); deallocate(pc->h, pn, sizeof(*pn)); } @@ -1860,8 +2082,8 @@ pagecache_node pagecache_allocate_node(pagecache_volume pv, sg_io fs_read, sg_io if (pn == INVALID_ADDRESS) return pn; pn->pv = pv; - pn->shared_maps = allocate_rangemap(h); - if (pn->shared_maps == INVALID_ADDRESS) { + pn->mappings = allocate_rangemap(h); + if (pn->mappings == INVALID_ADDRESS) { deallocate(h, pn, sizeof(struct pagecache_node)); return INVALID_ADDRESS; } @@ -1962,6 +2184,7 @@ void init_pagecache(heap general, heap contiguous, u64 pagesize) page_list_init(&pc->writing); list_init(&pc->volumes); list_init(&pc->shared_maps); + list_init(&pc->private_maps); init_closure_func(&pc->page_compare, rb_key_compare, pagecache_page_compare); init_closure_func(&pc->page_print_key, rbnode_handler, pagecache_page_print_key); diff --git a/src/kernel/pagecache.h b/src/kernel/pagecache.h index 232d2b0ee..9277af280 100644 --- a/src/kernel/pagecache.h +++ b/src/kernel/pagecache.h @@ -38,19 +38,19 @@ sg_io pagecache_node_get_reader(pagecache_node pn); sg_io pagecache_node_get_writer(pagecache_node pn); -void pagecache_node_add_shared_map(pagecache_node pn , range v /* bytes */, u64 node_offset); +void pagecache_node_add_mapping(pagecache_node pn , range v /* bytes */, u64 node_offset, + boolean shared); -void pagecache_node_close_shared_pages(pagecache_node pn, range q /* bytes */, flush_entry fe); - -void pagecache_node_scan_and_commit_shared_pages(pagecache_node pn, range q /* bytes */); +void pagecache_node_scan(pagecache_node pn, range q /* bytes */, status_handler complete); boolean pagecache_node_do_page_cow(pagecache_node pn, u64 node_offset, u64 vaddr, pageflags flags); void pagecache_node_fetch_pages(pagecache_node pn, range r /* bytes */, sg_list sg, status_handler complete); -void pagecache_get_page(pagecache_node pn, u64 node_offset, pagecache_page_handler handler); -void *pagecache_get_page_if_filled(pagecache_node pn, u64 node_offset); +void pagecache_get_page(pagecache_node pn, u64 node_offset, boolean private, + pagecache_page_handler handler); +void *pagecache_get_page_if_filled(pagecache_node pn, u64 node_offset, boolean private); void pagecache_release_page(pagecache_node pn, u64 node_offset); void pagecache_node_unmap_pages(pagecache_node pn, range v /* bytes */, u64 node_offset); diff --git a/src/kernel/pagecache_internal.h b/src/kernel/pagecache_internal.h index 82f487c06..7b0685abc 100644 --- a/src/kernel/pagecache_internal.h +++ b/src/kernel/pagecache_internal.h @@ -33,6 +33,7 @@ typedef struct pagecache { struct pagelist writing; struct list volumes; struct list shared_maps; + struct list private_maps; boolean writeback_in_progress; struct timer scan_timer; @@ -61,7 +62,7 @@ typedef struct pagecache_node { struct spinlock pages_lock; #endif struct rbtree pages; - rangemap shared_maps; /* shared mappings associated with this node */ + rangemap mappings; /* shared and private mappings associated with this node */ struct rangemap dirty; struct list ops; u64 length; @@ -97,12 +98,13 @@ struct pagecache_node_op_complete { status_handler sh; }; -typedef struct pagecache_shared_map { - struct rmnode n; /* pn->shared */ - struct list l; /* pc->shared_maps */ +typedef struct pagecache_map { + struct rmnode n; /* pn->mappings */ + struct list l; /* pc->shared_maps or pc->private_maps */ pagecache_node pn; u64 node_offset; /* file offset of va.start */ -} *pagecache_shared_map; + boolean shared; +} *pagecache_map; #define PAGECACHE_PAGESTATE_SHIFT 61 diff --git a/src/kernel/schedule.c b/src/kernel/schedule.c index 96feb3d49..0999e3d9d 100644 --- a/src/kernel/schedule.c +++ b/src/kernel/schedule.c @@ -203,7 +203,7 @@ NOTRACE void __attribute__((noreturn)) runloop_internal(void) service_thunk_queue(runqueue); /* should be a list of per-runloop checks - also low-pri background */ - mm_service(false); +// mm_service(false); timestamp here = now(CLOCK_ID_MONOTONIC_RAW); timestamp next_timeout = update_timer(here); diff --git a/src/runtime/buffer.c b/src/runtime/buffer.c index 3981f15f5..e5fbd9d01 100644 --- a/src/runtime/buffer.c +++ b/src/runtime/buffer.c @@ -51,6 +51,15 @@ boolean buffer_append(buffer b, return buffer_write(b, body, length); } +void *buffer_pop(buffer b, bytes len) +{ + if (buffer_length(b) < len) + return 0; + void *ptr = buffer_ref(b, 0); + buffer_consume(b, len); + return ptr; +} + /* The string in the buffer may or may not be null-terminated. */ int buffer_compare_with_sstring(buffer b, sstring str) { diff --git a/src/runtime/buffer.h b/src/runtime/buffer.h index 85dee3b0c..b7b34c077 100644 --- a/src/runtime/buffer.h +++ b/src/runtime/buffer.h @@ -280,6 +280,8 @@ static inline boolean buffer_write_byte(buffer b, u8 x) return true; } +void *buffer_pop(buffer b, bytes len); + static inline buffer sub_buffer(heap h, buffer b, bytes start, diff --git a/src/unix/exec.c b/src/unix/exec.c index 2fbd77e37..8759f0abe 100644 --- a/src/unix/exec.c +++ b/src/unix/exec.c @@ -279,7 +279,8 @@ closure_function(4, 5, boolean, faulting_map, u64 vmflags = VMAP_FLAG_READABLE | VMAP_FLAG_PROG; if (pageflags_is_exec(flags)) vmflags |= VMAP_FLAG_EXEC; - if (pageflags_is_writable(flags)) + boolean rw = pageflags_is_writable(flags); + if (rw) vmflags |= VMAP_FLAG_WRITABLE; if (tail_bss > 0) vmflags |= VMAP_FLAG_TAIL_BSS; @@ -287,12 +288,17 @@ closure_function(4, 5, boolean, faulting_map, exec_debug("%s: add %s to vmap: %R vmflags 0x%lx, offset 0x%lx, data_size 0x%lx, tail_bss 0x%lx\n", func_ss, pageflags_is_exec(flags) ? ss("text") : ss("data"), r, vmflags, offset, data_size, tail_bss); + pagecache_node pn = fsfile_get_cachenode(bound(f)); struct vmap k = ivmap(vmflags, bound(allowed_flags), offset, - fsfile_get_cachenode(bound(f)), 0); + pn, 0); if (tail_bss > 0) k.bss_offset = data_size; if (allocate_vmap(bound(p), r, k) == INVALID_ADDRESS) goto alloc_fail; + if (!rw) + /* Make the page cache aware of this mapping so that it can evict relevant pages on + * memory pressure. */ + pagecache_node_add_mapping(pn, r, offset, false); map_start += data_map_size; bss_size -= tail_bss; } diff --git a/src/unix/mmap.c b/src/unix/mmap.c index ad2465e6d..84556a2c5 100644 --- a/src/unix/mmap.c +++ b/src/unix/mmap.c @@ -230,30 +230,25 @@ static status demand_anonymous_page(process p, context ctx, u64 vaddr, vmap vm, return STATUS_OK; } -static status mmap_filebacked_page(vmap vm, u64 page_addr, pageflags flags, void *kvirt) +static status mmap_filebacked_page(vmap vm, u64 page_addr, pageflags flags, void *kvirt, + boolean private_page) { u64 vmap_offset = page_addr - vm->node.r.start; boolean pagecache_map; - status s; pagetable_lock(); u64 p = __physical_from_virtual_locked(pointer_from_u64(page_addr)); if (p == INVALID_PHYSICAL) { - pagecache_map = true; p = physical_from_virtual(kvirt); - if (vm->flags & VMAP_FLAG_TAIL_BSS) { - u64 bss_offset = vm->bss_offset; - if (point_in_range(irangel(vmap_offset, PAGESIZE), bss_offset)) { - pagecache_map = false; - void *new_page = allocate(mmap_info.virtual_backed, PAGESIZE); - if (new_page == INVALID_ADDRESS) { - vmap_debug("%s: cannot get physical page\n", func_ss); - s = timm_oom; - goto out; + if (!private_page) { + pagecache_map = true; + } else { + pagecache_map = false; + if (vm->flags & VMAP_FLAG_TAIL_BSS) { + u64 bss_offset = vm->bss_offset; + if (point_in_range(irangel(vmap_offset, PAGESIZE), bss_offset)) { + u64 bss_start = bss_offset - vmap_offset; + zero(kvirt + bss_start, PAGESIZE - bss_start); } - u64 bss_start = bss_offset - vmap_offset; - runtime_memcpy(new_page, kvirt, bss_start); - zero(new_page + bss_start, PAGESIZE - bss_start); - p = physical_from_virtual(new_page); } } map_nolock(page_addr, p, PAGESIZE, flags); @@ -261,12 +256,10 @@ static status mmap_filebacked_page(vmap vm, u64 page_addr, pageflags flags, void /* The mapping must have been done in parallel by another CPU. */ pagecache_map = false; } - s = STATUS_OK; - out: pagetable_unlock(); if (!pagecache_map) pagecache_release_page(vm->cache_node, vm->node_offset + vmap_offset); - return s; + return STATUS_OK; } closure_func_basic(pagecache_page_handler, void, pending_fault_page_handler, @@ -284,7 +277,8 @@ closure_func_basic(thunk, void, pending_fault_filebacked) pagecache_page_handler h = init_closure_func(&pf->filebacked.demand_file_page, pagecache_page_handler, pending_fault_page_handler); - pagecache_get_page(pf->filebacked.pn, pf->filebacked.node_offset, h); + pagecache_get_page(pf->filebacked.pn, pf->filebacked.node_offset, pf->filebacked.private_page, + h); } static status demand_filebacked_page(process p, context ctx, u64 vaddr, vmap vm, pending_fault *pf) @@ -295,7 +289,8 @@ static status demand_filebacked_page(process p, context ctx, u64 vaddr, vmap vm, pagecache_node pn = vm->cache_node; u64 node_offset = vm->node_offset + vmap_offset; boolean shared = (vm->flags & VMAP_FLAG_SHARED) != 0; - if (!shared && !(vm->flags & VMAP_FLAG_PROG)) + boolean private_page = (vm->flags & VMAP_FLAG_PROG) && (vm->flags & VMAP_FLAG_WRITABLE); + if (!shared && !private_page) flags = pageflags_readonly(flags); /* cow */ pf_debug(" node %p (start 0x%lx), offset 0x%lx, vm flags 0x%lx, pageflags 0x%lx\n", @@ -311,9 +306,9 @@ static status demand_filebacked_page(process p, context ctx, u64 vaddr, vmap vm, void *kvirt; status s; if (!*pf) { - kvirt = pagecache_get_page_if_filled(pn, node_offset); + kvirt = pagecache_get_page_if_filled(pn, node_offset, private_page); if (kvirt != INVALID_ADDRESS) - return mmap_filebacked_page(vm, page_addr, flags, kvirt); + return mmap_filebacked_page(vm, page_addr, flags, kvirt, private_page); pending_fault new_pf = new_pending_fault_locked(p, ctx, vaddr); if (new_pf != INVALID_ADDRESS) { pagecache_node_ref(pn); @@ -321,6 +316,7 @@ static status demand_filebacked_page(process p, context ctx, u64 vaddr, vmap vm, new_pf->filebacked.pn = pn; new_pf->filebacked.node_offset = node_offset; init_closure_func(&new_pf->async_handler, thunk, pending_fault_filebacked); + new_pf->filebacked.private_page = private_page; } *pf = new_pf; return STATUS_OK; @@ -331,7 +327,7 @@ static status demand_filebacked_page(process p, context ctx, u64 vaddr, vmap vm, if (kvirt == INVALID_ADDRESS) s = timm_oom; else - s = mmap_filebacked_page(vm, page_addr, flags, kvirt); + s = mmap_filebacked_page(vm, page_addr, flags, kvirt, private_page); return s; } @@ -1196,7 +1192,7 @@ closure_func_basic(vmap_handler, boolean, msync_vmap, (vm->flags & VMAP_FLAG_MMAP) && (vm->flags & VMAP_MMAP_TYPE_MASK) == VMAP_MMAP_TYPE_FILEBACKED) { vmap_assert(vm->cache_node); - pagecache_node_scan_and_commit_shared_pages(vm->cache_node, vm->node.r); + pagecache_node_scan(vm->cache_node, vm->node.r, 0); } return true; } @@ -1397,8 +1393,9 @@ static sysreturn mmap(void *addr, u64 length, int prot, int flags, int fd, u64 o vm->fault = k.fault; vmap_unlock(p); - if (vmap_mmap_type == VMAP_MMAP_TYPE_FILEBACKED && (vmflags & VMAP_FLAG_SHARED)) - pagecache_node_add_shared_map(node, irangel(q.start, len), offset); + if (vmap_mmap_type == VMAP_MMAP_TYPE_FILEBACKED) + pagecache_node_add_mapping(node, irangel(q.start, len), offset, + !!(vmflags & VMAP_FLAG_SHARED)); /* as man page suggests, ignore MAP_POPULATE if MAP_NONBLOCK is specified */ if ((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE && (prot & PROT_READ)) { diff --git a/src/unix/unix_internal.h b/src/unix/unix_internal.h index 7baf31fdd..fc978caa2 100644 --- a/src/unix/unix_internal.h +++ b/src/unix/unix_internal.h @@ -277,6 +277,7 @@ typedef struct pending_fault { u64 node_offset; closure_struct(pagecache_page_handler, demand_file_page); void *page_kvirt; + boolean private_page; } filebacked; void *custom; }; diff --git a/src/virtio/virtio_balloon.c b/src/virtio/virtio_balloon.c index eaec03fb5..5cf08d895 100644 --- a/src/virtio/virtio_balloon.c +++ b/src/virtio/virtio_balloon.c @@ -174,6 +174,8 @@ static u64 virtio_balloon_inflate(u64 n_balloon_pages) vqmsg_commit(vq, m, c); inflated++; } + if (inflated < n_balloon_pages) + mm_service(false); return inflated; } @@ -390,7 +392,7 @@ static boolean virtio_balloon_attach(heap general, backed_heap backed, heap phys virtio_balloon_update(); mem_cleaner bd = closure_func(general, mem_cleaner, virtio_balloon_deflater); assert(bd != INVALID_ADDRESS); - if (!mm_register_mem_cleaner(bd)) + if (0 && !mm_register_mem_cleaner(bd)) deallocate_closure(bd); if (balloon_has_stats_vq()) virtio_balloon_init_statsq(); diff --git a/src/x86_64/page.c b/src/x86_64/page.c index 13a4e2993..aa9ba4862 100644 --- a/src/x86_64/page.c +++ b/src/x86_64/page.c @@ -26,7 +26,7 @@ void page_invalidate(flush_entry f, u64 address) flush_tlb(true); } -void page_invalidate_sync(flush_entry f) +void page_invalidate_sync(flush_entry f, thunk completion, boolean wait) { } diff --git a/src/x86_64/page_machine.h b/src/x86_64/page_machine.h index f18cf6d14..5b428b786 100644 --- a/src/x86_64/page_machine.h +++ b/src/x86_64/page_machine.h @@ -223,6 +223,18 @@ static inline boolean pte_is_dirty(pte entry) return (entry & PAGE_DIRTY) != 0; } +static inline boolean pte_is_accessed(pte entry) +{ + return (entry & PAGE_ACCESSED) != 0; +} + +static inline boolean pte_clear_accessed(pteptr pp) +{ + boolean accessed = !!(*pp & PAGE_ACCESSED); + *pp &= ~PAGE_ACCESSED; + return accessed; +} + static inline u64 page_from_pte(pte p) { /* page directory pointer base address [51:12] */