Index: conf/files.i386 =================================================================== RCS file: /cvs/src/sys/conf/files.i386,v retrieving revision 1.13 diff -u -r1.13 files.i386 --- conf/files.i386 14 Feb 2004 21:12:38 -0000 1.13 +++ conf/files.i386 15 Feb 2004 07:32:02 -0000 @@ -213,6 +213,7 @@ i386/i386/perfmon.c optional perfmon i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/pmap.c standard +i386/i386/pmap_inval.c standard i386/i386/procfs_machdep.c standard i386/i386/spinlock.s standard i386/i386/support.s standard Index: i386/i386/bios.c =================================================================== RCS file: /cvs/src/sys/i386/i386/bios.c,v retrieving revision 1.8 diff -u -r1.8 bios.c --- i386/i386/bios.c 9 Nov 2003 02:22:35 -0000 1.8 +++ i386/i386/bios.c 15 Feb 2004 08:50:05 -0000 @@ -395,10 +395,11 @@ pte = PTmap; } /* - * install pointer to page 0. flush the tlb for safety. + * install pointer to page 0. Flush the tlb for safety. We don't + * migrate between cpus so a local flush is sufficient. */ *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; - invltlb(); + cpu_invltlb(); stack_top = stack; __va_start(ap, fmt); @@ -458,7 +459,7 @@ /* * XXX only needs to be invlpg(0) but that doesn't work on the 386 */ - invltlb(); + cpu_invltlb(); return (i); } Index: i386/i386/db_interface.c =================================================================== RCS file: /cvs/src/sys/i386/i386/db_interface.c,v retrieving revision 1.8 diff -u -r1.8 db_interface.c --- i386/i386/db_interface.c 28 Jan 2004 03:52:28 -0000 1.8 +++ i386/i386/db_interface.c 15 Feb 2004 08:52:18 -0000 @@ -278,7 +278,7 @@ } } - invltlb(); + cpu_invltlb(); } dst = (char *)addr; @@ -294,7 +294,7 @@ if (ptep1) *ptep1 = oldmap1; - invltlb(); + cpu_invltlb(); } } Index: i386/i386/machdep.c =================================================================== RCS file: /cvs/src/sys/i386/i386/machdep.c,v retrieving revision 1.53 diff -u -r1.53 machdep.c --- i386/i386/machdep.c 14 Feb 2004 19:58:50 -0000 1.53 +++ i386/i386/machdep.c 15 Feb 2004 08:49:05 -0000 @@ -1665,7 +1665,7 @@ * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; - invltlb(); + cpu_invltlb(); tmp = *(int *)ptr; /* @@ -1734,7 +1734,7 @@ } } *pte = 0; - invltlb(); + cpu_invltlb(); /* * XXX Index: i386/i386/mp_machdep.c =================================================================== RCS file: /cvs/src/sys/i386/i386/mp_machdep.c,v retrieving revision 1.21 diff -u -r1.21 mp_machdep.c --- i386/i386/mp_machdep.c 30 Jan 2004 05:42:16 -0000 1.21 +++ i386/i386/mp_machdep.c 15 Feb 2004 18:53:48 -0000 @@ -534,13 +534,13 @@ /* turn on 4MB of V == P addressing so we can get to MP table */ *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME); - invltlb(); + cpu_invltlb(); /* examine the MP table for needed info, uses physical addresses */ x = mptable_pass2(); *(int *)PTD = 0; - invltlb(); + cpu_invltlb(); /* can't process default configs till the CPU APIC is pmapped */ if (x) @@ -1998,7 +1998,7 @@ for (x = 0; x < NKPT; x++) PTD[x] = (pd_entry_t)(PG_V | PG_RW | ((kptbase + x * PAGE_SIZE) & PG_FRAME)); - invltlb(); + cpu_invltlb(); /* start each AP */ for (x = 1; x <= mp_naps; ++x) { @@ -2336,8 +2336,8 @@ return 1; } -int smp_active = 0; /* are the APs allowed to run? */ -SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); +int smp_active_mask = 1; /* which cpus have been initialized? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); /* XXX maybe should be hw.ncpu */ static int smp_cpus = 1; /* how many cpu's running */ @@ -2440,7 +2440,7 @@ if (smp_cpus == ncpus) { invltlb_ok = 1; smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ - smp_active = 1; /* historic */ + smp_active_mask = (1 << ncpus) - 1; } /* Index: i386/i386/pmap.c =================================================================== RCS file: /cvs/src/sys/i386/i386/pmap.c,v retrieving revision 1.29 diff -u -r1.29 pmap.c --- i386/i386/pmap.c 14 Feb 2004 20:34:27 -0000 1.29 +++ i386/i386/pmap.c 15 Feb 2004 08:56:51 -0000 @@ -105,6 +105,8 @@ #include #endif /* SMP || APIC_IO */ #include +#include +#include #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC @@ -135,8 +137,6 @@ #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) -#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) -#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) /* * Given a map and a machine independent protection code, @@ -196,11 +196,12 @@ static void pmap_remove_all (vm_page_t m); static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte); -static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, - vm_offset_t sva); -static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); +static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, + vm_offset_t sva, pmap_inval_info_t info); +static void pmap_remove_page (struct pmap *pmap, + vm_offset_t va, pmap_inval_info_t info); static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, - vm_offset_t va); + vm_offset_t va, pmap_inval_info_t info); static boolean_t pmap_testbit (vm_page_t m, int bit); static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m); @@ -211,7 +212,7 @@ static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex); static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); -static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); +static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); static unsigned pdir4mb; @@ -463,7 +464,7 @@ gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1; - invltlb(); + cpu_invltlb(); } #ifdef SMP @@ -590,47 +591,6 @@ return 0; } -static PMAP_INLINE void -invltlb_1pg(vm_offset_t va) -{ -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) { - invltlb(); - } else -#endif - { - invlpg(va); - } -} - -static __inline void -pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va) -{ -#if defined(SMP) - if (pmap->pm_active & (1 << mycpu->gd_cpuid)) - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - if (pmap->pm_active) - invltlb_1pg(va); -#endif -} - -static __inline void -pmap_TLB_invalidate_all(pmap_t pmap) -{ -#if defined(SMP) - if (pmap->pm_active & (1 << mycpu->gd_cpuid)) - cpu_invltlb(); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - if (pmap->pm_active) - invltlb(); -#endif -} - static unsigned * get_ptbase(pmap_t pmap) { @@ -647,12 +607,8 @@ if (frame != (((unsigned) APTDpde) & PG_FRAME)) { APTDpde = (pd_entry_t)(frame | PG_RW | PG_V); -#if defined(SMP) /* The page directory is not shared between CPUs */ cpu_invltlb(); -#else - invltlb(); -#endif } return (unsigned *) APTmap; } @@ -737,13 +693,15 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) { unsigned *pte; - unsigned npte, opte; + unsigned npte; + pmap_inval_info info; + pmap_inval_init(&info); + pmap_inval_add(&info, kernel_pmap, va); npte = pa | PG_RW | PG_V | pgeflag; pte = (unsigned *)vtopte(va); - opte = *pte; *pte = npte; - invltlb_1pg(va); + pmap_inval_flush(&info); } /* @@ -753,10 +711,13 @@ pmap_kremove(vm_offset_t va) { unsigned *pte; + pmap_inval_info info; + pmap_inval_init(&info); + pmap_inval_add(&info, kernel_pmap, va); pte = (unsigned *)vtopte(va); *pte = 0; - invltlb_1pg(va); + pmap_inval_flush(&info); } /* @@ -798,16 +759,12 @@ pte = (unsigned *)vtopte(va); *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; -#ifdef SMP cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; m++; } #ifdef SMP - smp_invltlb(); + smp_invltlb(); /* XXX */ #endif } @@ -827,11 +784,7 @@ pte = (unsigned *)vtopte(va); *pte = 0; -#ifdef SMP cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; } #ifdef SMP @@ -965,8 +918,9 @@ * drops to zero, then it decrements the wire count. */ static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { + pmap_inval_flush(info); while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) ; @@ -975,6 +929,7 @@ /* * unmap the page table page */ + pmap_inval_add(info, pmap, -1); pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == @@ -984,7 +939,6 @@ * take effect immediately. */ pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); - pmap_TLB_invalidate(pmap, pteva); } if (pmap->pm_ptphint == m) @@ -995,7 +949,6 @@ */ --m->wire_count; if (m->wire_count == 0) { - vm_page_flash(m); vm_page_busy(m); vm_page_free_zero(m); @@ -1007,11 +960,11 @@ } static PMAP_INLINE int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { vm_page_unhold(m); if (m->hold_count == 0) - return _pmap_unwire_pte_hold(pmap, m); + return _pmap_unwire_pte_hold(pmap, m, info); else return 0; } @@ -1021,7 +974,8 @@ * conditionally free the page, and manage the hold/wire counts. */ static int -pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte, + pmap_inval_info_t info) { unsigned ptepindex; if (va >= UPT_MIN_ADDRESS) @@ -1033,12 +987,13 @@ (pmap->pm_ptphint->pindex == ptepindex)) { mpte = pmap->pm_ptphint; } else { + pmap_inval_flush(info); mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); pmap->pm_ptphint = mpte; } } - return pmap_unwire_pte_hold(pmap, mpte); + return pmap_unwire_pte_hold(pmap, mpte, info); } void @@ -1255,7 +1210,8 @@ if (ptepa & PG_PS) { pmap->pm_pdir[ptepindex] = 0; ptepa = 0; - invltlb(); + cpu_invltlb(); + smp_invltlb(); } /* @@ -1501,9 +1457,9 @@ * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ - static int -pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) +pmap_remove_entry(struct pmap *pmap, vm_page_t m, + vm_offset_t va, pmap_inval_info_t info) { pv_entry_t pv; int rtval; @@ -1524,17 +1480,14 @@ rtval = 0; if (pv) { - - rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); + rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL) vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); free_pv_entry(pv); } - splx(s); return rtval; } @@ -1566,20 +1519,23 @@ * pmap_remove_pte: do the things to unmap a page in a process */ static int -pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) +pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va, + pmap_inval_info_t info) { unsigned oldpte; vm_page_t m; + pmap_inval_add(info, pmap, va); oldpte = loadandclear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support - * PG_G. + * PG_G. XXX PG_G is disabled for SMP so don't worry about + * the SMP case. */ if (oldpte & PG_G) - invlpg(va); + cpu_invlpg(va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte); @@ -1596,9 +1552,9 @@ } if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); - return pmap_remove_entry(pmap, m, va); + return pmap_remove_entry(pmap, m, va, info); } else { - return pmap_unuse_pt(pmap, va, NULL); + return pmap_unuse_pt(pmap, va, NULL, info); } return 0; @@ -1613,7 +1569,7 @@ * not kernel_pmap. */ static void -pmap_remove_page(struct pmap *pmap, vm_offset_t va) +pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info) { unsigned *ptq; @@ -1624,14 +1580,13 @@ if (*pmap_pde(pmap, va) != 0) { ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - (void) pmap_remove_pte(pmap, ptq, va); - pmap_TLB_invalidate(pmap, va); + pmap_remove_pte(pmap, ptq, va, info); } } } /* - * pmap_remopve: + * pmap_remove: * * Remove the given range of addresses from the specified map. * @@ -1648,7 +1603,7 @@ vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; - int anyvalid; + struct pmap_inval_info info; if (pmap == NULL) return; @@ -1656,6 +1611,8 @@ if (pmap->pm_stats.resident_count == 0) return; + pmap_inval_init(&info); + /* * special handling of removing one page. a very * common operation and easy to short circuit some @@ -1663,12 +1620,11 @@ */ if (((sva + PAGE_SIZE) == eva) && (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { - pmap_remove_page(pmap, sva); + pmap_remove_page(pmap, sva, &info); + pmap_inval_flush(&info); return; } - anyvalid = 0; - /* * Get a local virtual address for the mappings that are being * worked with. @@ -1690,9 +1646,9 @@ pdirindex = sindex / NPDEPG; if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); pmap->pm_pdir[pdirindex] = 0; pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anyvalid++; continue; } @@ -1712,22 +1668,16 @@ pdnxt = eindex; } - for ( ;sindex != pdnxt; sindex++) { + for (; sindex != pdnxt; sindex++) { vm_offset_t va; - if (ptbase[sindex] == 0) { + if (ptbase[sindex] == 0) continue; - } va = i386_ptob(sindex); - - anyvalid++; - if (pmap_remove_pte(pmap, - ptbase + sindex, va)) + if (pmap_remove_pte(pmap, ptbase + sindex, va, &info)) break; } } - - if (anyvalid) - pmap_TLB_invalidate_all(pmap); + pmap_inval_flush(&info); } /* @@ -1742,8 +1692,9 @@ static void pmap_remove_all(vm_page_t m) { - pv_entry_t pv; + struct pmap_inval_info info; unsigned *pte, tpte; + pv_entry_t pv; int s; #if defined(PMAP_DIAGNOSTIC) @@ -1756,11 +1707,13 @@ } #endif + pmap_inval_init(&info); s = splvm(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = loadandclear(pte); if (tpte & PG_W) @@ -1783,18 +1736,16 @@ if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); free_pv_entry(pv); } vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - splx(s); + pmap_inval_flush(&info); } /* @@ -1812,7 +1763,7 @@ unsigned *ptbase; vm_offset_t pdnxt, ptpaddr; vm_pindex_t sindex, eindex; - int anychanged; + pmap_inval_info info; if (pmap == NULL) return; @@ -1825,7 +1776,7 @@ if (prot & VM_PROT_WRITE) return; - anychanged = 0; + pmap_inval_init(&info); ptbase = get_ptbase(pmap); @@ -1840,9 +1791,9 @@ pdirindex = sindex / NPDEPG; if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); (unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anychanged++; continue; } @@ -1862,6 +1813,8 @@ unsigned pbits; vm_page_t m; + /* XXX this isn't optimal */ + pmap_inval_add(&info, pmap, i386_ptob(sindex)); pbits = ptbase[sindex]; if (pbits & PG_MANAGED) { @@ -1885,12 +1838,10 @@ if (pbits != ptbase[sindex]) { ptbase[sindex] = pbits; - anychanged = 1; } } } - if (anychanged) - pmap_TLB_invalidate_all(pmap); + pmap_inval_flush(&info); } /* @@ -1914,6 +1865,7 @@ vm_paddr_t opa; vm_offset_t origpte, newpte; vm_page_t mpte; + pmap_inval_info info; if (pmap == NULL) return; @@ -1934,26 +1886,8 @@ if (va < UPT_MIN_ADDRESS) { mpte = pmap_allocpte(pmap, va); } -#if 0 && defined(PMAP_DIAGNOSTIC) - else { - vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va); - if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { - panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], origpte, va); - } - if (smp_active) { - pdeaddr = (vm_offset_t *) IdlePTDS[cpuid]; - if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) { - if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr)) - printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr); - printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr); - panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], newpte, origpte, va); - } - } - } -#endif + pmap_inval_init(&info); pte = pmap_pte(pmap, va); /* @@ -1965,6 +1899,7 @@ } pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; + pmap_inval_add(&info, pmap, va); /* XXX non-optimal */ origpte = *(vm_offset_t *)pte; opa = origpte & PG_FRAME; @@ -2001,16 +1936,9 @@ mpte->hold_count--; if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { - if ((origpte & PG_RW) == 0) { + if ((origpte & PG_RW) == 0) *pte |= PG_RW; -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif - } + pmap_inval_flush(&info); return; } @@ -2034,7 +1962,7 @@ */ if (opa) { int err; - err = pmap_remove_pte(pmap, pte, va); + err = pmap_remove_pte(pmap, pte, va, &info); if (err) panic("pmap_enter: pte vanished, va: 0x%x", va); } @@ -2076,16 +2004,8 @@ */ if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; - /*if (origpte)*/ { -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif - } } + pmap_inval_flush(&info); } /* @@ -2104,6 +2024,9 @@ { unsigned *pte; vm_paddr_t pa; + pmap_inval_info info; + + pmap_inval_init(&info); /* * In the case that a page table page is not @@ -2160,7 +2083,7 @@ pte = (unsigned *)vtopte(va); if (*pte) { if (mpte) - pmap_unwire_pte_hold(pmap, mpte); + pmap_unwire_pte_hold(pmap, mpte, &info); return 0; } @@ -2273,7 +2196,8 @@ ptepindex += 1; } vm_page_flag_set(p, PG_MAPPED); - invltlb(); + cpu_invltlb(); + smp_invltlb(); return; } @@ -2360,7 +2284,6 @@ } } } - return; } /* @@ -2477,9 +2400,22 @@ /* * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. + * invalidate TLB. However, in an SMP environment we must use + * a locked bus cycle to update the pte (if we are not using + * the pmap_inval_*() API that is)... it's ok to do this for simple + * wiring changes. */ - pmap_pte_set_w(pte, wired); +#ifdef SMP + if (wired) + atomic_set_int(pte, PG_W); + else + atomic_clear_int(pte, PG_W); +#else + if (wired) + atomic_set_int_nonlocked(pte, PG_W); + else + atomic_clear_int_nonlocked(pte, PG_W); +#endif } @@ -2495,6 +2431,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { + pmap_inval_info info; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; @@ -2512,13 +2449,12 @@ dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); -#if defined(SMP) /* The page directory is not shared between CPUs */ cpu_invltlb(); -#else - invltlb(); -#endif } + pmap_inval_init(&info); + pmap_inval_add(&info, dst_pmap, -1); + pmap_inval_add(&info, src_pmap, -1); for(addr = src_addr; addr < end_addr; addr = pdnxt) { unsigned *src_pte, *dst_pte; @@ -2588,7 +2524,7 @@ pmap_insert_entry(dst_pmap, addr, dstmpte, m); } else { - pmap_unwire_pte_hold(dst_pmap, dstmpte); + pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); } if (dstmpte->hold_count >= srcmpte->hold_count) break; @@ -2598,6 +2534,7 @@ dst_pte++; } } + pmap_inval_flush(&info); } /* @@ -2803,6 +2740,7 @@ pv_entry_t pv, npv; int s; vm_page_t m; + pmap_inval_info info; #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) { @@ -2811,6 +2749,7 @@ } #endif + pmap_inval_init(&info); s = splvm(); for(pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; @@ -2826,6 +2765,7 @@ #else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); #endif + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = *pte; /* @@ -2861,11 +2801,11 @@ vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); } - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); free_pv_entry(pv); } + pmap_inval_flush(&info); splx(s); - pmap_TLB_invalidate_all(pmap); } /* @@ -2921,6 +2861,7 @@ static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem) { + struct pmap_inval_info info; pv_entry_t pv; unsigned *pte; int s; @@ -2928,6 +2869,7 @@ if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return; + pmap_inval_init(&info); s = splvm(); /* @@ -2950,11 +2892,21 @@ } #endif + /* + * Careful here. We can use a locked bus instruction to + * clear PG_A or PG_M safely but we need to synchronize + * with the target cpus when we mess with PG_RW. + */ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + if (bit == PG_RW) + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); if (setem) { - *(int *)pte |= bit; - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); +#ifdef SMP + atomic_set_int(pte, bit); +#else + atomic_set_int_nonlocked(pte, bit); +#endif } else { vm_offset_t pbits = *(vm_offset_t *)pte; if (pbits & bit) { @@ -2962,14 +2914,22 @@ if (pbits & PG_M) { vm_page_dirty(m); } - *(int *)pte = pbits & ~(PG_M|PG_RW); +#ifdef SMP + atomic_clear_int(pte, PG_M|PG_RW); +#else + atomic_clear_int_nonlocked(pte, PG_M|PG_RW); +#endif } else { - *(int *)pte = pbits & ~bit; +#ifdef SMP + atomic_clear_int(pte, bit); +#else + atomic_clear_int_nonlocked(pte, bit); +#endif } - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); } } } + pmap_inval_flush(&info); splx(s); } @@ -3038,10 +2998,11 @@ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte && (*pte & PG_A)) { - *pte &= ~PG_A; - - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); - +#ifdef SMP + atomic_clear_int(pte, PG_A); +#else + atomic_clear_int_nonlocked(pte, PG_A); +#endif rtval++; if (rtval > 4) { break; @@ -3148,7 +3109,8 @@ tmpva += PAGE_SIZE; pa += PAGE_SIZE; } - invltlb(); + cpu_invltlb(); + smp_invltlb(); return ((void *)(va + offset)); } Index: i386/i386/pmap_inval.c =================================================================== RCS file: i386/i386/pmap_inval.c diff -N i386/i386/pmap_inval.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ i386/i386/pmap_inval.c 15 Feb 2004 07:35:26 -0000 @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly$ + */ + +/* + * pmap invalidation support code. Certain hardware requirements must + * be dealt with when manipulating page table entries and page directory + * entries within a pmap. In particular, we cannot safely manipulate + * page tables which are in active use by another cpu (even if it is + * running in userland) for two reasons: First, TLB writebacks will + * race against our own modifications and tests. Second, even if we + * were to use bus-locked instruction we can still screw up the + * target cpu's instruction pipeline due to Intel cpu errata. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#if defined(SMP) || defined(APIC_IO) +#include +#include +#endif /* SMP || APIC_IO */ +#include +#include +#include + +#ifdef SMP + +static void +_cpu_invltlb(void *dummy) +{ + cpu_invltlb(); +} + +static void +_cpu_invl1pg(void *data) +{ + cpu_invlpg(data); +} + +#endif + +/* + * Initialize for add or flush + */ +void +pmap_inval_init(pmap_inval_info_t info) +{ + info->pir_flags = 0; +} + +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + */ +void +pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ +#ifdef SMP + if ((info->pir_flags & PIRF_CPUSYNC) == 0) { + info->pir_flags |= PIRF_CPUSYNC; + info->pir_cpusync.cs_run_func = NULL; + info->pir_cpusync.cs_fin1_func = NULL; + info->pir_cpusync.cs_fin2_func = NULL; + lwkt_cpusync_start(pmap->pm_active, &info->pir_cpusync); + } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { + lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync); + } +#else + if (pmap->pm_active == 0) + return; +#endif + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; +#endif + } + } else { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } +} + +/* + * Synchronize changes with target cpus. + */ +void +pmap_inval_flush(pmap_inval_info_t info) +{ +#ifdef SMP + if (info->pir_flags & PIRF_CPUSYNC) + lwkt_cpusync_finish(&info->pir_cpusync); +#else + if (info->pir_flags & PIRF_INVLTLB) + cpu_invltlb(); + else if (info->pir_flags & PIRF_INVL1PG) + cpu_invlpg(info->pir_cpusync.cs_data); +#endif + info->pir_flags = 0; +} + Index: i386/i386/vm_machdep.c =================================================================== RCS file: /cvs/src/sys/i386/i386/vm_machdep.c,v retrieving revision 1.26 diff -u -r1.26 vm_machdep.c --- i386/i386/vm_machdep.c 20 Dec 2003 05:52:26 -0000 1.26 +++ i386/i386/vm_machdep.c 15 Feb 2004 18:54:44 -0000 @@ -417,16 +417,15 @@ cpu_reset() { #ifdef SMP - if (smp_active == 0) { + if (smp_active_mask == 1) { cpu_reset_real(); /* NOTREACHED */ } else { - u_int map; int cnt; printf("cpu_reset called on cpu#%d\n",mycpu->gd_cpuid); - map = mycpu->gd_other_cpus & ~ stopped_cpus; + map = mycpu->gd_other_cpus & ~stopped_cpus & smp_active_mask; if (map != 0) { printf("cpu_reset: Stopping other CPUs\n"); @@ -502,7 +501,7 @@ bzero((caddr_t) PTD, PAGE_SIZE); /* "good night, sweet prince .... " */ - invltlb(); + cpu_invltlb(); /* NOTREACHED */ while(1); } Index: i386/include/cpu.h =================================================================== RCS file: /cvs/src/sys/i386/include/cpu.h,v retrieving revision 1.13 diff -u -r1.13 cpu.h --- i386/include/cpu.h 30 Jan 2004 05:42:16 -0000 1.13 +++ i386/include/cpu.h 15 Feb 2004 18:37:06 -0000 @@ -76,6 +76,8 @@ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) #define need_proftick() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) +#define need_ipiq() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_IPIQ) #define signotify() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) #define sigupcall() \ Index: i386/include/cpufunc.h =================================================================== RCS file: /cvs/src/sys/i386/include/cpufunc.h,v retrieving revision 1.7 diff -u -r1.7 cpufunc.h --- i386/include/cpufunc.h 26 Aug 2003 21:42:18 -0000 1.7 +++ i386/include/cpufunc.h 15 Feb 2004 08:58:49 -0000 @@ -313,35 +313,6 @@ #endif } -/* - * Invalidate a patricular VA on all cpus - */ -static __inline void -invlpg(u_int addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); - smp_invltlb(); -} - -/* - * Invalidate the TLB on all cpus - */ -static __inline void -invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); - smp_invltlb(); -#ifdef SWTCH_OPTIM_STATS - ++tlb_flush_count; -#endif -} - #endif /* _KERNEL */ static __inline u_short @@ -635,8 +606,6 @@ void insl (u_int port, void *addr, size_t cnt); void insw (u_int port, void *addr, size_t cnt); void invd (void); -void invlpg (u_int addr); -void invltlb (void); u_short inw (u_int port); u_int loadandclear (u_int *addr); void outb (u_int port, u_char data); Index: i386/include/pmap_inval.h =================================================================== RCS file: i386/include/pmap_inval.h diff -N i386/include/pmap_inval.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ i386/include/pmap_inval.h 15 Feb 2004 18:21:14 -0000 @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly$ + */ + +#ifndef _MACHINE_PMAP_INVAL_H_ +#define _MACHINE_PMAP_INVAL_H_ + +typedef struct pmap_inval_info { + int pir_flags; + struct lwkt_cpusync pir_cpusync; +} pmap_inval_info; + +typedef pmap_inval_info *pmap_inval_info_t; + +#define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ +#define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ +#define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ + +#ifdef _KERNEL + +void pmap_inval_init(pmap_inval_info_t); +void pmap_inval_add(pmap_inval_info_t, pmap_t, vm_offset_t); +void pmap_inval_flush(pmap_inval_info_t); + +#endif + +#endif Index: i386/include/smp.h =================================================================== RCS file: /cvs/src/sys/i386/include/smp.h,v retrieving revision 1.8 diff -u -r1.8 smp.h --- i386/include/smp.h 3 Nov 2003 02:08:33 -0000 1.8 +++ i386/include/smp.h 15 Feb 2004 18:54:59 -0000 @@ -152,14 +152,14 @@ /* global data in init_smp.c */ extern int invltlb_ok; -extern int smp_active; +extern int smp_active_mask; extern int smp_started; extern volatile int smp_idle_loops; #endif /* !LOCORE */ #else /* !SMP && !APIC_IO */ -#define smp_active 0 /* smp_active always 0 on UP machines */ +#define smp_active_mask 1 /* smp_active_mask always 1 on UP machines */ #endif Index: kern/kern_shutdown.c =================================================================== RCS file: /cvs/src/sys/kern/kern_shutdown.c,v retrieving revision 1.15 diff -u -r1.15 kern_shutdown.c --- kern/kern_shutdown.c 8 Jan 2004 18:39:18 -0000 1.15 +++ kern/kern_shutdown.c 15 Feb 2004 18:55:32 -0000 @@ -69,7 +69,7 @@ #include #include #include -#include /* smp_active, cpuid */ +#include /* smp_active_mask, cpuid */ #include @@ -230,7 +230,7 @@ howto |= shutdown_howto; #ifdef SMP - if (smp_active) { + if (smp_active_mask > 1) { printf("boot() called on cpu#%d\n", mycpu->gd_cpuid); } #endif Index: kern/lwkt_ipiq.c =================================================================== RCS file: /cvs/src/sys/kern/lwkt_ipiq.c,v retrieving revision 1.2 diff -u -r1.2 lwkt_ipiq.c --- kern/lwkt_ipiq.c 15 Feb 2004 05:15:25 -0000 1.2 +++ kern/lwkt_ipiq.c 15 Feb 2004 18:57:53 -0000 @@ -82,8 +82,9 @@ #endif #ifdef SMP -static __int64_t ipiq_count = 0; -static __int64_t ipiq_fifofull = 0; +static __int64_t ipiq_count; +static __int64_t ipiq_fifofull; +static __int64_t ipiq_cscount; #endif #ifdef _KERNEL @@ -91,6 +92,7 @@ #ifdef SMP SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); +SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); #endif #endif @@ -254,8 +256,11 @@ } } if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { - if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) - goto again; + if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) { + if (gd->gd_curthread->td_cscount == 0) + goto again; + need_ipiq(); + } } } @@ -278,8 +283,11 @@ } } if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { - if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) - goto again; + if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) { + if (gd->gd_curthread->td_cscount == 0) + goto again; + need_ipiq(); + } } } #endif @@ -304,6 +312,27 @@ return(wi != ip->ip_windex); } +#else + +/* + * !SMP dummy routines + */ + +int +lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) +{ + panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); + return(0); /* NOT REACHED */ +} + +void +lwkt_wait_ipiq(globaldata_t target, int seq) +{ + panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); +} + +#endif + /* * CPU Synchronization Support * @@ -375,50 +404,76 @@ void lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) { + globaldata_t gd = mycpu; + poll->cs_count = 0; poll->cs_mask = mask; - poll->cs_maxcount = lwkt_send_ipiq_mask(mask & mycpu->gd_other_cpus, - (ipifunc_t)lwkt_cpusync_remote1, poll); - if (mask & (1 << mycpu->gd_cpuid)) { +#ifdef SMP + poll->cs_maxcount = lwkt_send_ipiq_mask( + mask & gd->gd_other_cpus & smp_active_mask, + (ipifunc_t)lwkt_cpusync_remote1, poll); +#endif + if (mask & (1 << gd->gd_cpuid)) { if (poll->cs_run_func) poll->cs_run_func(poll); } - while (poll->cs_count != poll->cs_maxcount) { - crit_enter(); - lwkt_process_ipiq(); - crit_exit(); +#ifdef SMP + if (poll->cs_maxcount) { + ++ipiq_cscount; + ++gd->gd_curthread->td_cscount; + while (poll->cs_count != poll->cs_maxcount) { + crit_enter(); + lwkt_process_ipiq(); + crit_exit(); + } } +#endif } void lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) { + globaldata_t gd = mycpu; + int count; + mask &= ~poll->cs_mask; poll->cs_mask |= mask; - poll->cs_maxcount += lwkt_send_ipiq_mask(mask & mycpu->gd_other_cpus, - (ipifunc_t)lwkt_cpusync_remote1, poll); +#ifdef SMP + count = lwkt_send_ipiq_mask( + mask & gd->gd_other_cpus & smp_active_mask, + (ipifunc_t)lwkt_cpusync_remote1, poll); +#endif if (mask & (1 << mycpu->gd_cpuid)) { if (poll->cs_run_func) poll->cs_run_func(poll); } - while (poll->cs_count != poll->cs_maxcount) { - crit_enter(); - lwkt_process_ipiq(); - crit_exit(); +#ifdef SMP + poll->cs_maxcount += count; + if (poll->cs_maxcount) { + if (poll->cs_maxcount == count) + ++gd->gd_curthread->td_cscount; + while (poll->cs_count != poll->cs_maxcount) { + crit_enter(); + lwkt_process_ipiq(); + crit_exit(); + } } +#endif } /* * Finish synchronization with a set of target cpus. The target cpus will * execute cs_fin1_func(poll) prior to this function returning, and will * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. + * + * If cs_maxcount is non-zero then we are mastering a cpusync with one or + * more remote cpus and must account for it in our thread structure. */ void lwkt_cpusync_finish(lwkt_cpusync_t poll) { - int count; + globaldata_t gd = mycpu; - count = -(poll->cs_maxcount + 1); poll->cs_count = -1; if (poll->cs_mask & (1 << mycpu->gd_cpuid)) { if (poll->cs_fin1_func) @@ -426,13 +481,20 @@ if (poll->cs_fin2_func) poll->cs_fin2_func(poll->cs_data); } - while (poll->cs_count != count) { - crit_enter(); - lwkt_process_ipiq(); - crit_exit(); +#ifdef SMP + if (poll->cs_maxcount) { + while (poll->cs_count != -(poll->cs_maxcount + 1)) { + crit_enter(); + lwkt_process_ipiq(); + crit_exit(); + } + --gd->gd_curthread->td_cscount; } +#endif } +#ifdef SMP + /* * helper IPI remote messaging function. * @@ -485,25 +547,6 @@ ip->ip_arg[wi] = poll; ++ip->ip_windex; } -} - -#else - -/* - * !SMP dummy routines - */ - -int -lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) -{ - panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); - return(0); /* NOT REACHED */ -} - -void -lwkt_wait_ipiq(globaldata_t target, int seq) -{ - panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); } #endif Index: kern/lwkt_thread.c =================================================================== RCS file: /cvs/src/sys/kern/lwkt_thread.c,v retrieving revision 1.54 diff -u -r1.54 lwkt_thread.c --- kern/lwkt_thread.c 15 Feb 2004 02:14:41 -0000 1.54 +++ kern/lwkt_thread.c 15 Feb 2004 18:56:44 -0000 @@ -32,10 +32,11 @@ * to use a critical section to avoid problems. Foreign thread * scheduling is queued via (async) IPIs. * - * NOTE: on UP machines smp_active is defined to be 0. On SMP machines - * smp_active is 0 prior to SMP activation, then it is 1. The LWKT module - * uses smp_active to optimize UP builds and to avoid sending IPIs during - * early boot (primarily interrupt and network thread initialization). + * NOTE: on UP machines smp_active_mask is defined to be 1. On SMP machines + * smp_active_mask is 1 prior to SMP activation, then it is a mask of all + * available cpus. The LWKT module uses smp_active_mask to optimize UP + * builds and to avoid sending IPIs during early boot (primarily interrupt + * and network thread initialization). */ #ifdef _KERNEL @@ -88,6 +89,9 @@ #endif static int untimely_switch = 0; +#ifdef INVARIANTS +static int panic_on_cscount = 0; +#endif static __int64_t switch_count = 0; static __int64_t preempt_hit = 0; static __int64_t preempt_miss = 0; @@ -96,6 +100,9 @@ #ifdef _KERNEL SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); +#ifdef INVARIANTS +SYSCTL_INT(_lwkt, OID_AUTO, panic_on_cscount, CTLFLAG_RW, &panic_on_cscount, 0, ""); +#endif SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); @@ -263,7 +270,7 @@ td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; lwkt_initport(&td->td_msgport, td); pmap_init_thread(td); - if (smp_active == 0 || gd == mycpu) { + if (smp_active_mask == 1 || gd == mycpu) { crit_enter(); TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); crit_exit(); @@ -412,6 +419,14 @@ * actual value of mp_lock is not stable). */ mpheld = MP_LOCK_HELD(); +#ifdef INVARIANTS + if (td->td_cscount) { + printf("Diagnostic: attempt to switch while mastering cpusync: %p\n", + td); + if (panic_on_cscount) + panic("switching while mastering cpusync"); + } +#endif #endif if ((ntd = td->td_preempted) != NULL) { /* @@ -796,7 +811,7 @@ TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); --w->wa_count; td->td_wait = NULL; - if (smp_active == 0 || td->td_gd == mycpu) { + if (smp_active_mask == 1 || td->td_gd == mycpu) { _lwkt_enqueue(td); if (td->td_preemptable) { td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ @@ -817,7 +832,7 @@ * do not own the thread there might be a race but the * target cpu will deal with it. */ - if (smp_active == 0 || td->td_gd == mycpu) { + if (smp_active_mask == 1 || td->td_gd == mycpu) { _lwkt_enqueue(td); if (td->td_preemptable) { td->td_preemptable(td, TDPRI_CRIT); Index: sys/thread.h =================================================================== RCS file: /cvs/src/sys/sys/thread.h,v retrieving revision 1.45 diff -u -r1.45 thread.h --- sys/thread.h 15 Feb 2004 05:15:27 -0000 1.45 +++ sys/thread.h 15 Feb 2004 18:30:05 -0000 @@ -199,8 +199,10 @@ int td_nest_count; /* prevent splz nesting */ #ifdef SMP int td_mpcount; /* MP lock held (count) */ + int td_cscount; /* cpu synchronization master */ #else int td_unused001; + int td_unused002; #endif char td_comm[MAXCOMLEN+1]; /* typ 16+1 bytes */ struct thread *td_preempted; /* we preempted this thread */