Index: machine/pc32/i386/pmap.c =================================================================== RCS file: /cvs/src/sys/machine/pc32/i386/pmap.c,v retrieving revision 1.61 diff -u -r1.61 pmap.c --- machine/pc32/i386/pmap.c 7 Nov 2006 06:43:24 -0000 1.61 +++ machine/pc32/i386/pmap.c 21 Nov 2006 00:04:18 -0000 @@ -1279,7 +1279,7 @@ /*************************************************** -* Pmap allocation/deallocation routines. + * Pmap allocation/deallocation routines. ***************************************************/ /* @@ -1287,43 +1287,59 @@ * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ +static int pmap_release_callback(struct vm_page *p, void *data); + void pmap_release(struct pmap *pmap) { - vm_page_t p,n,ptdpg; vm_object_t object = pmap->pm_pteobj; - int curgeneration; + struct rb_vm_page_scan_info info; #if defined(DIAGNOSTIC) if (object->ref_count != 1) panic("pmap_release: pteobj reference count != 1"); #endif - ptdpg = NULL; -retry: + info.pmap = pmap; + info.object = object; crit_enter(); TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); - curgeneration = object->generation; - for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { - n = TAILQ_NEXT(p, listq); - if (p->pindex == PTDPTDI) { - ptdpg = p; - continue; - } - if (!pmap_release_free_page(pmap, p)) { - crit_exit(); - goto retry; - } - if (object->generation != curgeneration) { - crit_exit(); - goto retry; + crit_exit(); + + do { + crit_enter(); + info.error = 0; + info.mpte = NULL; + info.limit = object->generation; + + vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, + pmap_release_callback, &info); + if (info.error == 0 && info.mpte) { + if (!pmap_release_free_page(pmap, info.mpte)) + info.error = 1; } - } - if (ptdpg && !pmap_release_free_page(pmap, ptdpg)) { crit_exit(); - goto retry; + } while (info.error); +} + +static int +pmap_release_callback(struct vm_page *p, void *data) +{ + struct rb_vm_page_scan_info *info = data; + + if (p->pindex == PTDPTDI) { + info->mpte = p; + return(0); } - crit_exit(); + if (!pmap_release_free_page(info->pmap, p)) { + info->error = 1; + return(-1); + } + if (info->object->generation != info->limit) { + info->error = 1; + return(-1); + } + return(0); } static int @@ -2168,15 +2184,15 @@ * This eliminates the blast of soft faults on process startup and * immediately after an mmap. */ +static int pmap_object_init_pt_callback(vm_page_t p, void *data); + void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int limit) { - vm_offset_t tmpidx; + struct rb_vm_page_scan_info info; int psize; - vm_page_t p, mpte; - int objpgs; /* * We can't preinit if read access isn't set or there is no pmap @@ -2191,73 +2207,6 @@ if (curproc == NULL || pmap != vmspace_pmap(curproc->p_vmspace)) return; -#if 0 - /* - * XXX you must be joking, entering PTE's into a user page table - * without any accounting? This could result in the page table - * being freed while it still contains mappings (free with PG_ZERO - * assumption leading to a non-zero page being marked PG_ZERO). - */ - /* - * This code maps large physical mmap regions into the - * processor address space. Note that some shortcuts - * are taken, but the code works. - */ - if (pseflag && - (object->type == OBJT_DEVICE) && - ((addr & (NBPDR - 1)) == 0) && - ((size & (NBPDR - 1)) == 0) ) { - int i; - vm_page_t m[1]; - unsigned int ptepindex; - int npdes; - vm_offset_t ptepa; - - if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) - return; - -retry: - p = vm_page_lookup(object, pindex); - if (p && vm_page_sleep_busy(p, FALSE, "init4p")) - goto retry; - - if (p == NULL) { - p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); - if (p == NULL) - return; - m[0] = p; - - if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_free(p); - return; - } - - p = vm_page_lookup(object, pindex); - vm_page_wakeup(p); - } - - ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p); - if (ptepa & (NBPDR - 1)) { - return; - } - - p->valid = VM_PAGE_BITS_ALL; - - pmap->pm_stats.resident_count += size >> PAGE_SHIFT; - npdes = size >> PDRSHIFT; - for (i = 0; i < npdes; i++) { - pmap->pm_pdir[ptepindex] = - (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS); - ptepa += NBPDR; - ptepindex += 1; - } - vm_page_flag_set(p, PG_MAPPED); - cpu_invltlb(); - smp_invltlb(); - return; - } -#endif - psize = i386_btop(size); if ((object->type != OBJT_VNODE) || @@ -2272,80 +2221,56 @@ psize = object->size - pindex; } + if (psize == 0) + return; /* - * If we are processing a major portion of the object, then scan the - * entire thing. + * Use a red-black scan to traverse the requested range and load + * any valid pages found into the pmap. * * We cannot safely scan the object's memq unless we are in a * critical section since interrupts can remove pages from objects. */ - crit_enter(); - mpte = NULL; - if (psize > (object->resident_page_count >> 2)) { - objpgs = psize; + info.start_pindex = pindex; + info.end_pindex = pindex + psize - 1; + info.limit = limit; + info.mpte = NULL; + info.addr = addr; + info.pmap = pmap; - for (p = TAILQ_FIRST(&object->memq); - objpgs > 0 && p != NULL; - p = TAILQ_NEXT(p, listq) - ) { - tmpidx = p->pindex; - if (tmpidx < pindex) - continue; - tmpidx -= pindex; - if (tmpidx >= psize) - continue; + crit_enter(); + vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, + pmap_object_init_pt_callback, &info); + crit_exit(); +} - /* - * don't allow an madvise to blow away our really - * free pages allocating pv entries. - */ - if ((limit & MAP_PREFAULT_MADVISE) && - vmstats.v_free_count < vmstats.v_free_reserved) { - break; - } - if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (p->busy == 0) && - (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if ((p->queue - p->pc) == PQ_CACHE) - vm_page_deactivate(p); - vm_page_busy(p); - mpte = pmap_enter_quick(pmap, - addr + i386_ptob(tmpidx), p, mpte); - vm_page_flag_set(p, PG_MAPPED); - vm_page_wakeup(p); - } - objpgs -= 1; - } - } else { - /* - * else lookup the pages one-by-one. - */ - for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { - /* - * don't allow an madvise to blow away our really - * free pages allocating pv entries. - */ - if ((limit & MAP_PREFAULT_MADVISE) && - vmstats.v_free_count < vmstats.v_free_reserved) { - break; - } - p = vm_page_lookup(object, tmpidx + pindex); - if (p && - ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (p->busy == 0) && - (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if ((p->queue - p->pc) == PQ_CACHE) - vm_page_deactivate(p); - vm_page_busy(p); - mpte = pmap_enter_quick(pmap, - addr + i386_ptob(tmpidx), p, mpte); - vm_page_flag_set(p, PG_MAPPED); - vm_page_wakeup(p); - } - } +static +int +pmap_object_init_pt_callback(vm_page_t p, void *data) +{ + struct rb_vm_page_scan_info *info = data; + vm_pindex_t rel_index; + /* + * don't allow an madvise to blow away our really + * free pages allocating pv entries. + */ + if ((info->limit & MAP_PREFAULT_MADVISE) && + vmstats.v_free_count < vmstats.v_free_reserved) { + return(-1); + } + if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + if ((p->queue - p->pc) == PQ_CACHE) + vm_page_deactivate(p); + vm_page_busy(p); + rel_index = p->pindex - info->start_pindex; + info->mpte = pmap_enter_quick(info->pmap, + info->addr + i386_ptob(rel_index), + p, info->mpte); + vm_page_flag_set(p, PG_MAPPED); + vm_page_wakeup(p); } - crit_exit(); + return(0); } /* Index: vm/vm_object.c =================================================================== RCS file: /cvs/src/sys/vm/vm_object.c,v retrieving revision 1.26 diff -u -r1.26 vm_object.c --- vm/vm_object.c 11 Sep 2006 20:25:31 -0000 1.26 +++ vm/vm_object.c 20 Nov 2006 23:34:50 -0000 @@ -94,15 +94,9 @@ #define EASY_SCAN_FACTOR 8 -#define MSYNC_FLUSH_HARDSEQ 0x01 -#define MSYNC_FLUSH_SOFTSEQ 0x02 - -static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ; -SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags, - CTLFLAG_RW, &msync_flush_flags, 0, ""); - -static void vm_object_qcollapse (vm_object_t object); -static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags); +static void vm_object_qcollapse(vm_object_t object); +static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, + int pagerflags); /* * Virtual memory objects maintain the actual data @@ -151,7 +145,7 @@ _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object) { int incr; - TAILQ_INIT(&object->memq); + RB_INIT(&object->rb_memq); LIST_INIT(&object->shadow_head); object->type = type; @@ -387,11 +381,11 @@ * The object must be locked. * This routine may block. */ +static int vm_object_terminate_callback(vm_page_t p, void *data); + void vm_object_terminate(vm_object_t object) { - vm_page_t p; - /* * Make sure no one uses us. */ @@ -436,19 +430,8 @@ * remove them from the object. */ crit_enter(); - while ((p = TAILQ_FIRST(&object->memq)) != NULL) { - if (p->busy || (p->flags & PG_BUSY)) - panic("vm_object_terminate: freeing busy page %p", p); - if (p->wire_count == 0) { - vm_page_busy(p); - vm_page_free(p); - mycpu->gd_cnt.v_pfree++; - } else { - vm_page_busy(p); - vm_page_remove(p); - vm_page_wakeup(p); - } - } + vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, + vm_object_terminate_callback, NULL); crit_exit(); /* @@ -474,6 +457,23 @@ zfree(obj_zone, object); } +static int +vm_object_terminate_callback(vm_page_t p, void *data __unused) +{ + if (p->busy || (p->flags & PG_BUSY)) + panic("vm_object_terminate: freeing busy page %p", p); + if (p->wire_count == 0) { + vm_page_busy(p); + vm_page_free(p); + mycpu->gd_cnt.v_pfree++; + } else { + vm_page_busy(p); + vm_page_remove(p); + vm_page_wakeup(p); + } + return(0); +} + /* * vm_object_page_clean * @@ -487,16 +487,16 @@ * * Odd semantics: if start == end, we clean everything. */ +static int vm_object_page_clean_pass1(struct vm_page *p, void *data); +static int vm_object_page_clean_pass2(struct vm_page *p, void *data); void vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, - int flags) + int flags) { - vm_page_t p, np; - vm_offset_t tstart, tend; - vm_pindex_t pi; + struct rb_vm_page_scan_info info; struct vnode *vp; - int clearobjflags; + int wholescan; int pagerflags; int curgeneration; @@ -504,199 +504,135 @@ (object->flags & OBJ_MIGHTBEDIRTY) == 0) return; - pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; + pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? + VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0; vp = object->handle; + /* + * Interlock other major object operations. This allows us to + * temporarily clear OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY. + */ + crit_enter(); vm_object_set_flag(object, OBJ_CLEANING); /* * Handle 'entire object' case */ - tstart = start; + info.start_pindex = start; if (end == 0) { - tend = object->size; + info.end_pindex = object->size - 1; } else { - tend = end; + info.end_pindex = end - 1; } + wholescan = (start == 0 && info.end_pindex == object->size - 1); + info.limit = flags; + info.pagerflags = pagerflags; + info.object = object; /* - * If the caller is smart and only msync()s a range he knows is - * dirty, we may be able to avoid an object scan. This results in - * a phenominal improvement in performance. We cannot do this - * as a matter of course because the object may be huge - e.g. - * the size might be in the gigabytes or terrabytes. - */ - if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) { - vm_offset_t tscan; - int scanlimit; - int scanreset; - - scanreset = object->resident_page_count / EASY_SCAN_FACTOR; - if (scanreset < 16) - scanreset = 16; - pagerflags |= VM_PAGER_IGNORE_CLEANCHK; - - scanlimit = scanreset; - tscan = tstart; - - /* - * spl protection is required despite the obj generation - * tracking because we cannot safely call vm_page_test_dirty() - * or avoid page field tests against an interrupt unbusy/free - * race that might occur prior to the busy check in - * vm_object_page_collect_flush(). - */ - crit_enter(); - while (tscan < tend) { - curgeneration = object->generation; - p = vm_page_lookup(object, tscan); - if (p == NULL || p->valid == 0 || - (p->queue - p->pc) == PQ_CACHE) { - if (--scanlimit == 0) - break; - ++tscan; - continue; - } - vm_page_test_dirty(p); - if ((p->dirty & p->valid) == 0) { - if (--scanlimit == 0) - break; - ++tscan; - continue; - } - /* - * If we have been asked to skip nosync pages and - * this is a nosync page, we can't continue. - */ - if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { - if (--scanlimit == 0) - break; - ++tscan; - continue; + * If cleaning the entire object do a pass to mark the pages read-only. + * If everything worked out ok, clear OBJ_WRITEABLE and + * OBJ_MIGHTBEDIRTY. + */ + if (wholescan) { + info.error = 0; + vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, + vm_object_page_clean_pass1, &info); + if (info.error == 0) { + vm_object_clear_flag(object, + OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); + if (object->type == OBJT_VNODE && + (vp = (struct vnode *)object->handle) != NULL) { + if (vp->v_flag & VOBJDIRTY) + vclrflags(vp, VOBJDIRTY); } - scanlimit = scanreset; - - /* - * This returns 0 if it was unable to busy the first - * page (i.e. had to sleep). - */ - tscan += vm_object_page_collect_flush(object, p, - curgeneration, pagerflags); } - crit_exit(); - - /* - * If everything was dirty and we flushed it successfully, - * and the requested range is not the entire object, we - * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can - * return immediately. - */ - if (tscan >= tend && (tstart || tend < object->size)) { - vm_object_clear_flag(object, OBJ_CLEANING); - return; - } - pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK; } /* - * Generally set CLEANCHK interlock and make the page read-only so - * we can then clear the object flags. - * - * However, if this is a nosync mmap then the object is likely to - * stay dirty so do not mess with the page and do not clear the - * object flags. - * - * spl protection is required because an interrupt can remove page - * from the object. - */ - clearobjflags = 1; + * Do a pass to clean all the dirty pages we find. + */ + do { + info.error = 0; + curgeneration = object->generation; + vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, + vm_object_page_clean_pass2, &info); + } while (info.error || curgeneration != object->generation); - crit_enter(); - for (p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) { - vm_page_flag_set(p, PG_CLEANCHK); - if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) - clearobjflags = 0; - else - vm_page_protect(p, VM_PROT_READ); - } + vm_object_clear_flag(object, OBJ_CLEANING); crit_exit(); +} - if (clearobjflags && (tstart == 0) && (tend == object->size)) { - struct vnode *vp; +static +int +vm_object_page_clean_pass1(struct vm_page *p, void *data) +{ + struct rb_vm_page_scan_info *info = data; + + vm_page_flag_set(p, PG_CLEANCHK); + if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) + info->error = 1; + else + vm_page_protect(p, VM_PROT_READ); + return(0); +} - vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); - if (object->type == OBJT_VNODE && - (vp = (struct vnode *)object->handle) != NULL) { - if (vp->v_flag & VOBJDIRTY) - vclrflags(vp, VOBJDIRTY); - } - } +static +int +vm_object_page_clean_pass2(struct vm_page *p, void *data) +{ + struct rb_vm_page_scan_info *info = data; + int n; /* - * spl protection is required both to avoid an interrupt unbusy/free - * race against a vm_page_lookup(), and also to ensure that the - * memq is consistent. We do not want a busy page to be ripped out - * from under us. + * Do not mess with pages that were inserted after we started + * the cleaning pass. */ - crit_enter(); -rescan: - crit_exit(); - crit_enter(); - curgeneration = object->generation; - - for (p = TAILQ_FIRST(&object->memq); p; p = np) { - int n; + if ((p->flags & PG_CLEANCHK) == 0) + return(0); - np = TAILQ_NEXT(p, listq); - -again: - pi = p->pindex; - if (((p->flags & PG_CLEANCHK) == 0) || - (pi < tstart) || (pi >= tend) || - (p->valid == 0) || - ((p->queue - p->pc) == PQ_CACHE)) { - vm_page_flag_clear(p, PG_CLEANCHK); - continue; - } - - vm_page_test_dirty(p); - if ((p->dirty & p->valid) == 0) { - vm_page_flag_clear(p, PG_CLEANCHK); - continue; - } - - /* - * If we have been asked to skip nosync pages and this is a - * nosync page, skip it. Note that the object flags were - * not cleared in this case so we do not have to set them. - */ - if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { - vm_page_flag_clear(p, PG_CLEANCHK); - continue; - } + /* + * Before wasting time traversing the pmaps, check for trivial + * cases where the page cannot be dirty. + */ + if (p->valid == 0 || (p->queue - p->pc) == PQ_CACHE) { + KKASSERT((p->dirty & p->valid) == 0); + return(0); + } - n = vm_object_page_collect_flush(object, p, - curgeneration, pagerflags); - if (n == 0) - goto rescan; - if (object->generation != curgeneration) - goto rescan; + /* + * Check whether the page is dirty or not. The page has been set + * to be read-only so the check will not race a user dirtying the + * page. + */ + vm_page_test_dirty(p); + if ((p->dirty & p->valid) == 0) { + vm_page_flag_clear(p, PG_CLEANCHK); + return(0); + } - /* - * Try to optimize the next page. If we can't we pick up - * our (random) scan where we left off. - */ - if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) { - if ((p = vm_page_lookup(object, pi + n)) != NULL) - goto again; - } + /* + * If we have been asked to skip nosync pages and this is a + * nosync page, skip it. Note that the object flags were + * not cleared in this case (because pass1 will have returned an + * error), so we do not have to set them. + */ + if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { + vm_page_flag_clear(p, PG_CLEANCHK); + return(0); } - crit_exit(); - vm_object_clear_flag(object, OBJ_CLEANING); - return; + /* + * Flush as many pages as we can. PG_CLEANCHK will be cleared on + * the pages that get successfully flushed. Set info->error if + * we raced an object modification. + */ + n = vm_object_page_collect_flush(info->object, p, info->pagerflags); + if (n == 0) + info->error = 1; + return(0); } /* @@ -711,24 +647,28 @@ * recode this to explicitly busy the pages. */ static int -vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags) +vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags) { int runlen; int maxf; int chkb; int maxb; int i; + int curgeneration; vm_pindex_t pi; vm_page_t maf[vm_pageout_page_count]; vm_page_t mab[vm_pageout_page_count]; vm_page_t ma[vm_pageout_page_count]; + curgeneration = object->generation; + pi = p->pindex; while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { if (object->generation != curgeneration) { return(0); } } + KKASSERT(p->object == object && p->pindex == pi); maxf = 0; for(i = 1; i < vm_pageout_page_count; i++) { @@ -827,18 +767,24 @@ * * The object must be locked. */ +static int vm_object_deactivate_pages_callback(vm_page_t p, void *data); + static void vm_object_deactivate_pages(vm_object_t object) { - vm_page_t p, next; - crit_enter(); - for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { - next = TAILQ_NEXT(p, listq); - vm_page_deactivate(p); - } + vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, + vm_object_deactivate_pages_callback, NULL); crit_exit(); } + +static int +vm_object_deactivate_pages_callback(vm_page_t p, void *data __unused) +{ + vm_page_deactivate(p); + return(0); +} + #endif /* @@ -883,29 +829,31 @@ * * The object must *not* be locked. */ + +static int vm_object_pmap_remove_callback(vm_page_t p, void *data); + void vm_object_pmap_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { - vm_page_t p; + struct rb_vm_page_scan_info info; if (object == NULL) return; - - /* - * spl protection is required because an interrupt can unbusy/free - * a page. - */ + info.start_pindex = start; + info.end_pindex = end - 1; crit_enter(); - for (p = TAILQ_FIRST(&object->memq); - p != NULL; - p = TAILQ_NEXT(p, listq) - ) { - if (p->pindex >= start && p->pindex < end) - vm_page_protect(p, VM_PROT_NONE); - } - crit_exit(); - if ((start == 0) && (object->size == end)) + vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, + vm_object_pmap_remove_callback, &info); + if (start == 0 && end == object->size) vm_object_clear_flag(object, OBJ_WRITEABLE); + crit_exit(); +} + +static int +vm_object_pmap_remove_callback(vm_page_t p, void *data __unused) +{ + vm_page_protect(p, VM_PROT_NONE); + return(0); } /* @@ -1121,13 +1069,13 @@ #define OBSC_COLLAPSE_NOWAIT 0x0002 #define OBSC_COLLAPSE_WAIT 0x0004 +static int vm_object_backing_scan_callback(vm_page_t p, void *data); + static __inline int vm_object_backing_scan(vm_object_t object, int op) { - int r = 1; - vm_page_t p; + struct rb_vm_page_scan_info info; vm_object_t backing_object; - vm_pindex_t backing_offset_index; /* * spl protection is required to avoid races between the memq/lookup, @@ -1137,7 +1085,7 @@ crit_enter(); backing_object = object->backing_object; - backing_offset_index = OFF_TO_IDX(object->backing_object_offset); + info.backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* * Initial conditions @@ -1164,159 +1112,176 @@ } /* - * Our scan + * Our scan. We have to retry if a negative error code is returned, + * otherwise 0 or 1 will be returned in info.error. 0 Indicates that + * the scan had to be stopped because the parent does not completely + * shadow the child. */ + info.object = object; + info.backing_object = backing_object; + info.limit = op; + do { + info.error = 1; + vm_page_rb_tree_RB_SCAN(&backing_object->rb_memq, NULL, + vm_object_backing_scan_callback, + &info); + } while (info.error < 0); + crit_exit(); + return(info.error); +} - p = TAILQ_FIRST(&backing_object->memq); - while (p) { - vm_page_t next = TAILQ_NEXT(p, listq); - vm_pindex_t new_pindex = p->pindex - backing_offset_index; - - if (op & OBSC_TEST_ALL_SHADOWED) { - vm_page_t pp; - - /* - * Ignore pages outside the parent object's range - * and outside the parent object's mapping of the - * backing object. - * - * note that we do not busy the backing object's - * page. - */ +static int +vm_object_backing_scan_callback(vm_page_t p, void *data) +{ + struct rb_vm_page_scan_info *info = data; + vm_object_t backing_object; + vm_object_t object; + vm_pindex_t new_pindex; + vm_pindex_t backing_offset_index; + int op; - if ( - p->pindex < backing_offset_index || - new_pindex >= object->size - ) { - p = next; - continue; - } + new_pindex = p->pindex - info->backing_offset_index; + op = info->limit; + object = info->object; + backing_object = info->backing_object; + backing_offset_index = info->backing_offset_index; - /* - * See if the parent has the page or if the parent's - * object pager has the page. If the parent has the - * page but the page is not valid, the parent's - * object pager must have the page. - * - * If this fails, the parent does not completely shadow - * the object and we might as well give up now. - */ + if (op & OBSC_TEST_ALL_SHADOWED) { + vm_page_t pp; - pp = vm_page_lookup(object, new_pindex); - if ( - (pp == NULL || pp->valid == 0) && - !vm_pager_has_page(object, new_pindex, NULL, NULL) - ) { - r = 0; - break; - } + /* + * Ignore pages outside the parent object's range + * and outside the parent object's mapping of the + * backing object. + * + * note that we do not busy the backing object's + * page. + */ + if ( + p->pindex < backing_offset_index || + new_pindex >= object->size + ) { + return(0); } /* - * Check for busy page + * See if the parent has the page or if the parent's + * object pager has the page. If the parent has the + * page but the page is not valid, the parent's + * object pager must have the page. + * + * If this fails, the parent does not completely shadow + * the object and we might as well give up now. */ - if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { - vm_page_t pp; - - if (op & OBSC_COLLAPSE_NOWAIT) { - if ( - (p->flags & PG_BUSY) || - !p->valid || - p->hold_count || - p->wire_count || - p->busy - ) { - p = next; - continue; - } - } else if (op & OBSC_COLLAPSE_WAIT) { - if (vm_page_sleep_busy(p, TRUE, "vmocol")) { - /* - * If we slept, anything could have - * happened. Since the object is - * marked dead, the backing offset - * should not have changed so we - * just restart our scan. - */ - p = TAILQ_FIRST(&backing_object->memq); - continue; - } - } - - /* - * Busy the page - */ - vm_page_busy(p); + pp = vm_page_lookup(object, new_pindex); + if ( + (pp == NULL || pp->valid == 0) && + !vm_pager_has_page(object, new_pindex, NULL, NULL) + ) { + info->error = 0; /* problemo */ + return(-1); /* stop the scan */ + } + } - KASSERT( - p->object == backing_object, - ("vm_object_qcollapse(): object mismatch") - ); + /* + * Check for busy page + */ - /* - * Destroy any associated swap - */ - if (backing_object->type == OBJT_SWAP) { - swap_pager_freespace( - backing_object, - p->pindex, - 1 - ); - } + if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { + vm_page_t pp; + if (op & OBSC_COLLAPSE_NOWAIT) { if ( - p->pindex < backing_offset_index || - new_pindex >= object->size + (p->flags & PG_BUSY) || + !p->valid || + p->hold_count || + p->wire_count || + p->busy ) { - /* - * Page is out of the parent object's range, we - * can simply destroy it. - */ - vm_page_protect(p, VM_PROT_NONE); - vm_page_free(p); - p = next; - continue; + return(0); } - - pp = vm_page_lookup(object, new_pindex); - if ( - pp != NULL || - vm_pager_has_page(object, new_pindex, NULL, NULL) - ) { + } else if (op & OBSC_COLLAPSE_WAIT) { + if (vm_page_sleep_busy(p, TRUE, "vmocol")) { /* - * page already exists in parent OR swap exists - * for this location in the parent. Destroy - * the original page from the backing object. + * If we slept, anything could have + * happened. Ask that the scan be restarted. * - * Leave the parent's page alone + * Since the object is marked dead, the + * backing offset should not have changed. */ - vm_page_protect(p, VM_PROT_NONE); - vm_page_free(p); - p = next; - continue; + info->error = -1; + return(-1); } + } + + /* + * Busy the page + */ + vm_page_busy(p); + KASSERT( + p->object == backing_object, + ("vm_object_qcollapse(): object mismatch") + ); + + /* + * Destroy any associated swap + */ + if (backing_object->type == OBJT_SWAP) { + swap_pager_freespace( + backing_object, + p->pindex, + 1 + ); + } + + if ( + p->pindex < backing_offset_index || + new_pindex >= object->size + ) { /* - * Page does not exist in parent, rename the - * page from the backing object to the main object. - * - * If the page was mapped to a process, it can remain - * mapped through the rename. + * Page is out of the parent object's range, we + * can simply destroy it. */ - if ((p->queue - p->pc) == PQ_CACHE) - vm_page_deactivate(p); + vm_page_protect(p, VM_PROT_NONE); + vm_page_free(p); + return(0); + } - vm_page_rename(p, object, new_pindex); - /* page automatically made dirty by rename */ + pp = vm_page_lookup(object, new_pindex); + if ( + pp != NULL || + vm_pager_has_page(object, new_pindex, NULL, NULL) + ) { + /* + * page already exists in parent OR swap exists + * for this location in the parent. Destroy + * the original page from the backing object. + * + * Leave the parent's page alone + */ + vm_page_protect(p, VM_PROT_NONE); + vm_page_free(p); + return(0); } - p = next; + + /* + * Page does not exist in parent, rename the + * page from the backing object to the main object. + * + * If the page was mapped to a process, it can remain + * mapped through the rename. + */ + if ((p->queue - p->pc) == PQ_CACHE) + vm_page_deactivate(p); + + vm_page_rename(p, object, new_pindex); + /* page automatically made dirty by rename */ } - crit_exit(); - return(r); + return(0); } - /* * this version of collapse allows the operation to occur earlier and * when paging_in_progress is true for an object... This is not a complete @@ -1465,7 +1430,7 @@ */ KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object)); - KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object)); + KASSERT(RB_EMPTY(&backing_object->rb_memq), ("backing_object %p somehow has left over pages during collapse!", backing_object)); crit_enter(); TAILQ_REMOVE( &vm_object_list, @@ -1536,105 +1501,102 @@ * Removes all physical pages in the specified * object range from the object's list of pages. */ +static int vm_object_page_remove_callback(vm_page_t p, void *data); + void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, - boolean_t clean_only) + boolean_t clean_only) { - vm_page_t p, next; - unsigned int size; + struct rb_vm_page_scan_info info; int all; - if (object == NULL || object->resident_page_count == 0) - return; - - all = ((end == 0) && (start == 0)); - /* - * Since physically-backed objects do not use managed pages, we can't - * remove pages from the object (we must instead remove the page - * references, and then destroy the object). + * Degenerate cases and assertions */ + if (object == NULL || object->resident_page_count == 0) + return; KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object")); /* - * Indicating that the object is undergoing paging. - * - * spl protection is required to avoid a race between the memq scan, - * an interrupt unbusy/free, and the busy check. + * Indicate that paging is occuring on the object */ - vm_object_pip_add(object, 1); crit_enter(); -again: - size = end - start; - if (all || size > object->resident_page_count / 4) { - for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { - next = TAILQ_NEXT(p, listq); - if (all || ((start <= p->pindex) && (p->pindex < end))) { - if (p->wire_count != 0) { - vm_page_protect(p, VM_PROT_NONE); - if (!clean_only) - p->valid = 0; - continue; - } + vm_object_pip_add(object, 1); - /* - * The busy flags are only cleared at - * interrupt -- minimize the spl transitions - */ + /* + * Figure out the actual removal range and whether we are removing + * the entire contents of the object or not. If removing the entire + * contents, be sure to get all pages, even those that might be + * beyond the end of the object. + */ + info.start_pindex = start; + if (end == 0) + info.end_pindex = (vm_pindex_t)-1; + else + info.end_pindex = end - 1; + info.limit = clean_only; + all = (start == 0 && info.end_pindex >= object->size - 1); - if (vm_page_sleep_busy(p, TRUE, "vmopar")) - goto again; + /* + * Loop until we are sure we have gotten them all. + */ + do { + info.error = 0; + vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, + vm_object_page_remove_callback, &info); + } while (info.error); - if (clean_only && p->valid) { - vm_page_test_dirty(p); - if (p->valid & p->dirty) - continue; - } + /* + * Cleanup + */ + vm_object_pip_wakeup(object); + crit_exit(); +} - vm_page_busy(p); - vm_page_protect(p, VM_PROT_NONE); - vm_page_free(p); - } - } - } else { - while (size > 0) { - if ((p = vm_page_lookup(object, start)) != 0) { - if (p->wire_count != 0) { - vm_page_protect(p, VM_PROT_NONE); - if (!clean_only) - p->valid = 0; - start += 1; - size -= 1; - continue; - } +static int +vm_object_page_remove_callback(vm_page_t p, void *data) +{ + struct rb_vm_page_scan_info *info = data; - /* - * The busy flags are only cleared at - * interrupt -- minimize the spl transitions - */ - if (vm_page_sleep_busy(p, TRUE, "vmopar")) - goto again; + /* + * Wired pages cannot be destroyed, but they can be invalidated + * and we do so if clean_only (limit) is not set. + */ + if (p->wire_count != 0) { + vm_page_protect(p, VM_PROT_NONE); + if (info->limit == 0) + p->valid = 0; + return(0); + } - if (clean_only && p->valid) { - vm_page_test_dirty(p); - if (p->valid & p->dirty) { - start += 1; - size -= 1; - continue; - } - } + /* + * The busy flags are only cleared at + * interrupt -- minimize the spl transitions + */ - vm_page_busy(p); - vm_page_protect(p, VM_PROT_NONE); - vm_page_free(p); - } - start += 1; - size -= 1; - } + if (vm_page_sleep_busy(p, TRUE, "vmopar")) { + info->error = 1; + return(0); } - crit_exit(); - vm_object_pip_wakeup(object); + + /* + * limit is our clean_only flag. If set and the page is dirty, do + * not free it. + */ + if (info->limit && p->valid) { + vm_page_test_dirty(p); + if (p->valid & p->dirty) + return(0); + } + + /* + * Destroy the page + */ + vm_page_busy(p); + vm_page_protect(p, VM_PROT_NONE); + vm_page_free(p); + return(0); } /* @@ -1902,7 +1864,7 @@ db_indent += 2; count = 0; - for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { + RB_FOREACH(p, vm_page_rb_tree, &object->rb_memq) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { Index: vm/vm_object.h =================================================================== RCS file: /cvs/src/sys/vm/vm_object.h,v retrieving revision 1.10 diff -u -r1.10 vm_object.h --- vm/vm_object.h 20 May 2006 02:42:15 -0000 1.10 +++ vm/vm_object.h 20 Nov 2006 21:13:31 -0000 @@ -87,6 +87,9 @@ #ifndef _VM_VM_H_ #include #endif +#ifndef _VM_VM_PAGE_H_ +#include +#endif #ifdef _KERNEL @@ -129,7 +132,7 @@ TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ - TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ + RB_HEAD(vm_page_rb_tree, vm_page) rb_memq; /* resident pages */ int generation; /* generation ID */ vm_size_t size; /* Object size */ int ref_count; /* How many refs?? */ Index: vm/vm_page.c =================================================================== RCS file: /cvs/src/sys/vm/vm_page.c,v retrieving revision 1.32 diff -u -r1.32 vm_page.c --- vm/vm_page.c 27 Jul 2005 07:55:15 -0000 1.32 +++ vm/vm_page.c 20 Nov 2006 23:36:58 -0000 @@ -94,14 +94,13 @@ static vm_page_t vm_page_select_cache(vm_object_t, vm_pindex_t); static vm_page_t _vm_page_list_find2(int basequeue, int index); -static int vm_page_bucket_count; /* How big is array? */ -static int vm_page_hash_mask; /* Mask for hash function */ -static struct vm_page **vm_page_buckets; /* Array of buckets */ -static volatile int vm_page_bucket_generation; struct vpgqueues vm_page_queues[PQ_COUNT]; /* Array of tailq lists */ #define ASSERT_IN_CRIT_SECTION() KKASSERT(crit_test(curthread)); +RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, + vm_pindex_t, pindex); + static void vm_page_queue_init(void) { @@ -198,7 +197,6 @@ vm_page_startup(vm_offset_t vaddr) { vm_offset_t mapped; - struct vm_page **bucket; vm_size_t npages; vm_paddr_t page_range; vm_paddr_t new_end; @@ -208,7 +206,6 @@ vm_paddr_t last_pa; vm_paddr_t end; vm_paddr_t biggestone, biggestsize; - vm_paddr_t total; total = 0; @@ -234,6 +231,7 @@ } end = phys_avail[biggestone+1]; + end = trunc_page(end); /* * Initialize the queue headers for the free queue, the active queue @@ -243,58 +241,13 @@ vm_page_queue_init(); /* - * Allocate (and initialize) the hash table buckets. - * - * The number of buckets MUST BE a power of 2, and the actual value is - * the next power of 2 greater than the number of physical pages in - * the system. - * - * We make the hash table approximately 2x the number of pages to - * reduce the chain length. This is about the same size using the - * singly-linked list as the 1x hash table we were using before - * using TAILQ but the chain length will be smaller. - * - * Note: This computation can be tweaked if desired. - */ - vm_page_buckets = (struct vm_page **)vaddr; - bucket = vm_page_buckets; - if (vm_page_bucket_count == 0) { - vm_page_bucket_count = 1; - while (vm_page_bucket_count < atop(total)) - vm_page_bucket_count <<= 1; - } - vm_page_bucket_count <<= 1; - vm_page_hash_mask = vm_page_bucket_count - 1; - - /* - * Cut a chunk out of the largest block of physical memory, - * moving its end point down to accomodate the hash table and - * vm_page_array. - */ - new_end = end - vm_page_bucket_count * sizeof(struct vm_page *); - new_end = trunc_page(new_end); - mapped = round_page(vaddr); - vaddr = pmap_map(mapped, new_end, end, - VM_PROT_READ | VM_PROT_WRITE); - vaddr = round_page(vaddr); - bzero((caddr_t) mapped, vaddr - mapped); - - for (i = 0; i < vm_page_bucket_count; i++) { - *bucket = NULL; - bucket++; - } - - /* * Compute the number of pages of memory that will be available for * use (taking into account the overhead of a page structure per * page). */ first_page = phys_avail[0] / PAGE_SIZE; page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page; - npages = (total - (page_range * sizeof(struct vm_page)) - - (end - new_end)) / PAGE_SIZE; - - end = new_end; + npages = (total - (page_range * sizeof(struct vm_page))) / PAGE_SIZE; /* * Initialize the mem entry structures now, and put them in the free @@ -339,20 +292,29 @@ } /* - * Distributes the object/offset key pair among hash buckets. - * - * NOTE: This macro depends on vm_page_bucket_count being a power of 2. - * This routine may not block. - * - * We try to randomize the hash based on the object to spread the pages - * out in the hash table without it costing us too much. + * Scan comparison function for Red-Black tree scans. An inclusive + * (start,end) is expected. Other fields are not used. */ -static __inline int -vm_page_hash(vm_object_t object, vm_pindex_t pindex) +int +rb_vm_page_scancmp(struct vm_page *p, void *data) { - int i = ((uintptr_t)object + pindex) ^ object->hash_rand; + struct rb_vm_page_scan_info *info = data; + + if (p->pindex < info->start_pindex) + return(-1); + if (p->pindex > info->end_pindex) + return(1); + return(0); +} - return(i & vm_page_hash_mask); +int +rb_vm_page_compare(struct vm_page *p1, struct vm_page *p2) +{ + if (p1->pindex < p2->pindex) + return(-1); + if (p1->pindex > p2->pindex) + return(1); + return(0); } /* @@ -387,8 +349,6 @@ void vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - struct vm_page **bucket; - ASSERT_IN_CRIT_SECTION(); if (m->object != NULL) panic("vm_page_insert: already inserted"); @@ -400,17 +360,9 @@ m->pindex = pindex; /* - * Insert it into the object_object/offset hash table + * Insert it into the object. */ - bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; - m->hnext = *bucket; - *bucket = m; - vm_page_bucket_generation++; - - /* - * Now link into the object's list of backed pages. - */ - TAILQ_INSERT_TAIL(&object->memq, m, listq); + vm_page_rb_tree_RB_INSERT(&object->rb_memq, m); object->generation++; /* @@ -443,7 +395,6 @@ vm_page_remove(vm_page_t m) { vm_object_t object; - struct vm_page **bucket; crit_enter(); if (m->object == NULL) { @@ -457,34 +408,13 @@ object = m->object; /* - * Remove from the object_object/offset hash table. The object - * must be on the hash queue, we will panic if it isn't - * - * Note: we must NULL-out m->hnext to prevent loops in detached - * buffers with vm_page_lookup(). - */ - bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; - while (*bucket != m) { - if (*bucket == NULL) - panic("vm_page_remove(): page not found in hash"); - bucket = &(*bucket)->hnext; - } - *bucket = m->hnext; - m->hnext = NULL; - vm_page_bucket_generation++; - - /* - * Now remove from the object's list of backed pages. - */ - TAILQ_REMOVE(&object->memq, m, listq); - - /* - * And show that the object has one fewer resident page. + * Remove the page from the object and update the object. */ + vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m); object->resident_page_count--; object->generation++; - m->object = NULL; + crit_exit(); } @@ -507,25 +437,15 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; - struct vm_page **bucket; - int generation; /* * Search the hash table for this object/offset pair */ -retry: - generation = vm_page_bucket_generation; - bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; - for (m = *bucket; m != NULL; m = m->hnext) { - if ((m->object == object) && (m->pindex == pindex)) { - if (vm_page_bucket_generation != generation) - goto retry; - return (m); - } - } - if (vm_page_bucket_generation != generation) - goto retry; - return (NULL); + crit_enter(); + m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); + crit_exit(); + KKASSERT(m == NULL || (m->object == object && m->pindex == pindex)); + return(m); } /* Index: vm/vm_page.h =================================================================== RCS file: /cvs/src/sys/vm/vm_page.h,v retrieving revision 1.24 diff -u -r1.24 vm_page.h --- vm/vm_page.h 21 May 2006 03:43:47 -0000 1.24 +++ vm/vm_page.h 20 Nov 2006 23:55:13 -0000 @@ -79,6 +79,9 @@ #ifndef _SYS_TYPES_H_ #include #endif +#ifndef _SYS_TREE_H_ +#include +#endif #ifndef _MACHINE_PMAP_H_ #include #endif @@ -134,10 +137,14 @@ struct msf_buf; struct vm_object; +int rb_vm_page_compare(struct vm_page *, struct vm_page *); + +struct vm_page_rb_tree; +RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, vm_pindex_t); + struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* vm_page_queues[] list (P) */ - struct vm_page *hnext; /* hash table link (O,P) */ - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ + RB_ENTRY(vm_page) rb_entry; /* Red-Black tree based at object */ struct vm_object *object; /* which object am I in (O,P)*/ vm_pindex_t pindex; /* offset into object (O,P) */ @@ -244,6 +251,29 @@ #define PQ_HOLD (3 + 2*PQ_MAXL2_SIZE) #define PQ_COUNT (4 + 2*PQ_MAXL2_SIZE) +/* + * Scan support + */ +struct vm_map; + +struct rb_vm_page_scan_info { + vm_pindex_t start_pindex; + vm_pindex_t end_pindex; + int limit; + int desired; + int error; + int pagerflags; + vm_offset_t addr; + vm_pindex_t backing_offset_index; + struct vm_object *object; + struct vm_object *backing_object; + struct vm_page *mpte; + struct pmap *pmap; + struct vm_map *map; +}; + +int rb_vm_page_scancmp(struct vm_page *, void *); + struct vpgqueues { struct pglist pl; int *cnt; Index: vm/vm_pageout.c =================================================================== RCS file: /cvs/src/sys/vm/vm_pageout.c,v retrieving revision 1.26 diff -u -r1.26 vm_pageout.c --- vm/vm_pageout.c 7 Nov 2006 17:51:24 -0000 1.26 +++ vm/vm_pageout.c 20 Nov 2006 23:59:08 -0000 @@ -471,12 +471,13 @@ * * The object and map must be locked. */ +static int vm_pageout_object_deactivate_pages_callback(vm_page_t, void *); + static void vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object, vm_pindex_t desired, int map_remove_only) { - vm_page_t p, next; - int rcount; + struct rb_vm_page_scan_info info; int remove_mode; if (object->type == OBJT_DEVICE || object->type == OBJT_PHYS) @@ -498,64 +499,68 @@ * our busy check. */ crit_enter(); - rcount = object->resident_page_count; - p = TAILQ_FIRST(&object->memq); + info.limit = remove_mode; + info.map = map; + info.desired = desired; + vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, + vm_pageout_object_deactivate_pages_callback, + &info + ); + crit_exit(); + object = object->backing_object; + } +} + +static int +vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data) +{ + struct rb_vm_page_scan_info *info = data; + int actcount; - while (p && (rcount-- > 0)) { - int actcount; - if (pmap_resident_count(vm_map_pmap(map)) <= desired) { - crit_exit(); - return; - } - next = TAILQ_NEXT(p, listq); - mycpu->gd_cnt.v_pdpages++; - if (p->wire_count != 0 || - p->hold_count != 0 || - p->busy != 0 || - (p->flags & (PG_BUSY|PG_UNMANAGED)) || - !pmap_page_exists_quick(vm_map_pmap(map), p)) { - p = next; - continue; - } + if (pmap_resident_count(vm_map_pmap(info->map)) <= info->desired) { + return(-1); + } + mycpu->gd_cnt.v_pdpages++; + if (p->wire_count != 0 || p->hold_count != 0 || p->busy != 0 || + (p->flags & (PG_BUSY|PG_UNMANAGED)) || + !pmap_page_exists_quick(vm_map_pmap(info->map), p)) { + return(0); + } - actcount = pmap_ts_referenced(p); - if (actcount) { - vm_page_flag_set(p, PG_REFERENCED); - } else if (p->flags & PG_REFERENCED) { - actcount = 1; - } + actcount = pmap_ts_referenced(p); + if (actcount) { + vm_page_flag_set(p, PG_REFERENCED); + } else if (p->flags & PG_REFERENCED) { + actcount = 1; + } - if ((p->queue != PQ_ACTIVE) && - (p->flags & PG_REFERENCED)) { - vm_page_activate(p); - p->act_count += actcount; - vm_page_flag_clear(p, PG_REFERENCED); - } else if (p->queue == PQ_ACTIVE) { - if ((p->flags & PG_REFERENCED) == 0) { - p->act_count -= min(p->act_count, ACT_DECLINE); - if (!remove_mode && (vm_pageout_algorithm || (p->act_count == 0))) { - vm_page_protect(p, VM_PROT_NONE); - vm_page_deactivate(p); - } else { - TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); - TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); - } - } else { - vm_page_activate(p); - vm_page_flag_clear(p, PG_REFERENCED); - if (p->act_count < (ACT_MAX - ACT_ADVANCE)) - p->act_count += ACT_ADVANCE; - TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); - TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); - } - } else if (p->queue == PQ_INACTIVE) { + if ((p->queue != PQ_ACTIVE) && + (p->flags & PG_REFERENCED)) { + vm_page_activate(p); + p->act_count += actcount; + vm_page_flag_clear(p, PG_REFERENCED); + } else if (p->queue == PQ_ACTIVE) { + if ((p->flags & PG_REFERENCED) == 0) { + p->act_count -= min(p->act_count, ACT_DECLINE); + if (!info->limit && (vm_pageout_algorithm || (p->act_count == 0))) { vm_page_protect(p, VM_PROT_NONE); + vm_page_deactivate(p); + } else { + TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); + TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); } - p = next; + } else { + vm_page_activate(p); + vm_page_flag_clear(p, PG_REFERENCED); + if (p->act_count < (ACT_MAX - ACT_ADVANCE)) + p->act_count += ACT_ADVANCE; + TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); + TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); } - crit_exit(); - object = object->backing_object; + } else if (p->queue == PQ_INACTIVE) { + vm_page_protect(p, VM_PROT_NONE); } + return(0); } /*