Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.14 diff -u -r1.14 kern_lock.c --- kern/kern_lock.c 6 Jun 2005 15:02:27 -0000 1.14 +++ kern/kern_lock.c 16 Nov 2005 18:42:08 -0000 @@ -50,7 +50,9 @@ #include #include #include +#include #include +#include /* * 0: no warnings, 1: warnings, 2: panic @@ -106,6 +108,9 @@ } } +/* + * lock acquisition helper routine. Called with the lock's spinlock held. + */ static int acquire(struct lock *lkp, int extflags, int wanted) { @@ -120,29 +125,32 @@ return 0; } - crit_enter(); while ((lkp->lk_flags & wanted) != 0) { lkp->lk_flags |= LK_WAIT_NONZERO; lkp->lk_waitcount++; - /* note: serialization lock is held through tsleep */ + + /* + * Use the _quick version so the critical section is left + * intact, protecting the tsleep interlock. See + * tsleep_interlock() for a description of what is + * happening here. + */ + tsleep_interlock(lkp); + spin_unlock_quick(&lkp->lk_spinlock); error = tsleep(lkp, lkp->lk_prio, lkp->lk_wmesg, ((extflags & LK_TIMELOCK) ? lkp->lk_timo : 0)); + spin_lock_quick(&lkp->lk_spinlock); if (lkp->lk_waitcount == 1) { lkp->lk_flags &= ~LK_WAIT_NONZERO; lkp->lk_waitcount = 0; } else { lkp->lk_waitcount--; } - if (error) { - crit_exit(); + if (error) return error; - } - if (extflags & LK_SLEEPFAIL) { - crit_exit(); + if (extflags & LK_SLEEPFAIL) return ENOLCK; - } } - crit_exit(); return 0; } @@ -152,19 +160,21 @@ * Shared requests increment the shared count. Exclusive requests set the * LK_WANT_EXCL flag (preventing further shared locks), and wait for already * accepted shared locks and shared-to-exclusive upgrades to go away. + * + * A spinlock is held for most of the procedure. We must not do anything + * fancy while holding the spinlock. */ int #ifndef DEBUG_LOCKS -lockmgr(struct lock *lkp, u_int flags, lwkt_tokref_t interlkp, +lockmgr(struct lock *lkp, u_int flags, struct spinlock *interlkp, struct thread *td) #else -debuglockmgr(struct lock *lkp, u_int flags, lwkt_tokref_t interlkp, +debuglockmgr(struct lock *lkp, u_int flags, struct spinlock *interlkp, struct thread *td, const char *name, const char *file, int line) #endif { int error; int extflags; - lwkt_tokref ilock; static int didpanic; error = 0; @@ -175,6 +185,8 @@ #ifndef DEBUG_LOCKS if (lockmgr_from_int == 2) { didpanic = 1; + if (flags & LK_INTERLOCK) + spin_unlock(interlkp); panic( "lockmgr %s from %p: called from interrupt", lkp->lk_wmesg, ((int **)&lkp)[-1]); @@ -187,6 +199,8 @@ #else if (lockmgr_from_int == 2) { didpanic = 1; + if (flags & LK_INTERLOCK) + spin_unlock(interlkp); panic( "lockmgr %s from %s:%d: called from interrupt", lkp->lk_wmesg, file, line); @@ -199,14 +213,13 @@ #endif } - lwkt_gettoken(&ilock, &lkp->lk_interlock); + spin_lock(&lkp->lk_spinlock); if (flags & LK_INTERLOCK) - lwkt_reltoken(interlkp); + spin_unlock(interlkp); extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; switch (flags & LK_TYPE_MASK) { - case LK_SHARED: /* * If we are not the exclusive lock holder, we have to block @@ -247,8 +260,10 @@ /* fall into downgrade */ case LK_DOWNGRADE: - if (lkp->lk_lockholder != td || lkp->lk_exclusivecount == 0) + if (lkp->lk_lockholder != td || lkp->lk_exclusivecount == 0) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: not holding exclusive lock"); + } sharelock(lkp, lkp->lk_exclusivecount); lkp->lk_exclusivecount = 0; lkp->lk_flags &= ~LK_HAVE_EXCL; @@ -280,8 +295,10 @@ * after the upgrade). If we return an error, the file * will always be unlocked. */ - if ((lkp->lk_lockholder == td) || (lkp->lk_sharecount <= 0)) + if ((lkp->lk_lockholder == td) || (lkp->lk_sharecount <= 0)) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: upgrade exclusive lock"); + } shareunlock(lkp, 1); COUNT(td, -1); /* @@ -307,8 +324,10 @@ break; lkp->lk_flags |= LK_HAVE_EXCL; lkp->lk_lockholder = td; - if (lkp->lk_exclusivecount != 0) + if (lkp->lk_exclusivecount != 0) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: non-zero exclusive count"); + } lkp->lk_exclusivecount = 1; #if defined(DEBUG_LOCKS) lkp->lk_filename = file; @@ -333,8 +352,10 @@ /* * Recursive lock. */ - if ((extflags & (LK_NOWAIT | LK_CANRECURSE)) == 0) + if ((extflags & (LK_NOWAIT | LK_CANRECURSE)) == 0) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: locking against myself"); + } if ((extflags & LK_CANRECURSE) != 0) { lkp->lk_exclusivecount++; COUNT(td, 1); @@ -365,8 +386,10 @@ break; lkp->lk_flags |= LK_HAVE_EXCL; lkp->lk_lockholder = td; - if (lkp->lk_exclusivecount != 0) + if (lkp->lk_exclusivecount != 0) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: non-zero exclusive count"); + } lkp->lk_exclusivecount = 1; #if defined(DEBUG_LOCKS) lkp->lk_filename = file; @@ -380,6 +403,7 @@ if (lkp->lk_exclusivecount != 0) { if (lkp->lk_lockholder != td && lkp->lk_lockholder != LK_KERNTHREAD) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: pid %d, not %s thr %p unlocking", (td->td_proc ? td->td_proc->p_pid : -99), "exclusive lock holder", @@ -410,8 +434,10 @@ * check for holding a shared lock, but at least we can * check for an exclusive one. */ - if (lkp->lk_lockholder == td) + if (lkp->lk_lockholder == td) { + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: draining against myself"); + } error = acquiredrain(lkp, extflags); if (error) @@ -428,7 +454,7 @@ break; default: - lwkt_reltoken(&ilock); + spin_unlock(&lkp->lk_spinlock); panic("lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ @@ -439,10 +465,13 @@ lkp->lk_flags &= ~LK_WAITDRAIN; wakeup((void *)&lkp->lk_flags); } - lwkt_reltoken(&ilock); + spin_unlock(&lkp->lk_spinlock); return (error); } +/* + * lock acquisition helper routine. Called with the lock's spinlock held. + */ static int acquiredrain(struct lock *lkp, int extflags) { @@ -457,10 +486,18 @@ while (lkp->lk_flags & LK_ALL) { lkp->lk_flags |= LK_WAITDRAIN; - /* interlock serialization held through tsleep */ + /* + * Use the _quick version so the critical section is left + * intact, protecting the tsleep interlock. See + * tsleep_interlock() for a description of what is + * happening here. + */ + tsleep_interlock(&lkp->lk_flags); + spin_unlock_quick(&lkp->lk_spinlock); error = tsleep(&lkp->lk_flags, lkp->lk_prio, lkp->lk_wmesg, ((extflags & LK_TIMELOCK) ? lkp->lk_timo : 0)); + spin_lock_quick(&lkp->lk_spinlock); if (error) return error; if (extflags & LK_SLEEPFAIL) { @@ -476,7 +513,7 @@ void lockinit(struct lock *lkp, int prio, char *wmesg, int timo, int flags) { - lwkt_token_init(&lkp->lk_interlock); + spin_init(&lkp->lk_spinlock); lkp->lk_flags = (flags & LK_EXTFLG_MASK); lkp->lk_sharecount = 0; lkp->lk_waitcount = 0; @@ -508,10 +545,9 @@ int lockstatus(struct lock *lkp, struct thread *td) { - lwkt_tokref ilock; int lock_type = 0; - lwkt_gettoken(&ilock, &lkp->lk_interlock); + spin_lock(&lkp->lk_spinlock); if (lkp->lk_exclusivecount != 0) { if (td == NULL || lkp->lk_lockholder == td) lock_type = LK_EXCLUSIVE; @@ -520,7 +556,7 @@ } else if (lkp->lk_sharecount != 0) { lock_type = LK_SHARED; } - lwkt_reltoken(&ilock); + spin_unlock(&lkp->lk_spinlock); return (lock_type); } @@ -532,12 +568,11 @@ int lockcount(struct lock *lkp) { - lwkt_tokref ilock; int count; - lwkt_gettoken(&ilock, &lkp->lk_interlock); + spin_lock(&lkp->lk_spinlock); count = lkp->lk_exclusivecount + lkp->lk_sharecount; - lwkt_reltoken(&ilock); + spin_unlock(&lkp->lk_spinlock); return (count); } Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.53 diff -u -r1.53 kern_synch.c --- kern/kern_synch.c 14 Nov 2005 18:50:05 -0000 1.53 +++ kern/kern_synch.c 16 Nov 2005 18:23:14 -0000 @@ -469,6 +469,37 @@ } /* + * This is a dandy function that allows us to interlock tsleep/wakeup + * operations with unspecified upper level locks, such as lockmgr locks, + * simply by holding a critical section. The sequence is: + * + * (enter critical section) + * (acquire upper level lock) + * tsleep_interlock(blah) + * (release upper level lock) + * tsleep(blah, ...) + * (exit critical section) + * + * Basically this function sets our cpumask for the ident which informs + * other cpus that our cpu 'might' be waiting (or about to wait on) the + * hash index related to the ident. The critical section prevents another + * cpu's wakeup() from being processed on our cpu until we are actually + * able to enter the tsleep(). Thus, no race occurs between our attempt + * to release a resource and sleep, and another cpu's attempt to acquire + * a resource and call wakeup. + * + * There isn't much of a point to this function unless you call it while + * holding a critical section. + */ +void +tsleep_interlock(void *ident) +{ + int id = LOOKUP(ident); + + atomic_set_int(&slpque_cpumasks[id], mycpu->gd_cpumask); +} + +/* * Implement the timeout for tsleep. * * We set P_BREAKTSLEEP to indicate that an event has occured, but Index: kern/vfs_bio.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.52 diff -u -r1.52 vfs_bio.c --- kern/vfs_bio.c 14 Nov 2005 19:14:05 -0000 1.52 +++ kern/vfs_bio.c 16 Nov 2005 18:43:25 -0000 @@ -105,7 +105,7 @@ vm_page_t bogus_page; int vmiodirenable = TRUE; int runningbufspace; -struct lwkt_token buftimetoken; /* Interlock on setting prio and timo */ +struct spinlock buftimespinlock; /* Interlock on setting prio and timo */ static int bufspace, maxbufspace, bufmallocspace, maxbufmallocspace, lobufspace, hibufspace; @@ -436,7 +436,7 @@ int i; LIST_INIT(&invalhash); - lwkt_token_init(&buftimetoken); + spin_init(&buftimespinlock); for (i = 0; i <= bufhashmask; i++) LIST_INIT(&bufhashtbl[i]); Index: sys/buf.h =================================================================== RCS file: /cvs/src/sys/sys/buf.h,v retrieving revision 1.20 diff -u -r1.20 buf.h --- sys/buf.h 12 Aug 2005 00:17:26 -0000 1.20 +++ sys/buf.h 16 Nov 2005 18:27:31 -0000 @@ -59,10 +59,12 @@ #ifndef _SYS_TREE_H_ #include #endif - #ifndef _SYS_BIO_H_ #include #endif +#ifndef _SYS_SPINLOCK_H_ +#include +#endif struct buf; struct mount; @@ -276,7 +278,7 @@ /* * Buffer locking. See sys/buf2.h for inline functions. */ -extern struct lwkt_token buftimetoken; /* Interlock on setting prio and timo */ +extern struct spinlock buftimespinlock; /* Interlock on setting prio and timo */ extern char *buf_wmesg; /* Default buffer lock message */ #define BUF_WMESG "bufwait" Index: sys/buf2.h =================================================================== RCS file: /cvs/src/sys/sys/buf2.h,v retrieving revision 1.10 diff -u -r1.10 buf2.h --- sys/buf2.h 10 Jun 2005 23:59:33 -0000 1.10 +++ sys/buf2.h 16 Nov 2005 18:39:30 -0000 @@ -48,10 +48,12 @@ #ifndef _SYS_GLOBALDATA_H_ #include /* curthread */ #endif - #ifndef _SYS_THREAD2_H_ #include /* crit_*() functions */ #endif +#ifndef _SYS_SPINLOCK2_H_ +#include /* crit_*() functions */ +#endif /* * Initialize a lock. @@ -66,17 +68,14 @@ static __inline int BUF_LOCK(struct buf *bp, int locktype) { - lwkt_tokref ilock; int ret; - crit_enter(); - lwkt_gettoken(&ilock, &buftimetoken); - locktype |= LK_INTERLOCK; + spin_lock(&buftimespinlock); bp->b_lock.lk_wmesg = buf_wmesg; bp->b_lock.lk_prio = 0; /* tsleep flags */ /* bp->b_lock.lk_timo = 0; not necessary */ - ret = lockmgr(&(bp)->b_lock, locktype, &ilock, curthread); - crit_exit(); + ret = lockmgr(&(bp)->b_lock, locktype | LK_INTERLOCK, + &buftimespinlock, curthread); return ret; } /* @@ -85,17 +84,14 @@ static __inline int BUF_TIMELOCK(struct buf *bp, int locktype, char *wmesg, int catch, int timo) { - lwkt_tokref ilock; int ret; - crit_enter(); - lwkt_gettoken(&ilock, &buftimetoken); - locktype |= LK_INTERLOCK | LK_TIMELOCK; + spin_lock(&buftimespinlock); bp->b_lock.lk_wmesg = wmesg; bp->b_lock.lk_prio = catch; /* tsleep flags */ bp->b_lock.lk_timo = timo; - ret = lockmgr(&(bp)->b_lock, locktype, &ilock, curthread); - crit_exit(); + ret = lockmgr(&(bp)->b_lock, locktype | LK_INTERLOCK | LK_TIMELOCK, + &buftimespinlock, curthread); return ret; } /* @@ -105,9 +101,7 @@ static __inline void BUF_UNLOCK(struct buf *bp) { - crit_enter(); lockmgr(&(bp)->b_lock, LK_RELEASE, NULL, curthread); - crit_exit(); } /* @@ -134,12 +128,7 @@ static __inline int BUF_REFCNT(struct buf *bp) { - int ret; - - crit_enter(); - ret = lockcount(&(bp)->b_lock); - crit_exit(); - return ret; + return (lockcount(&(bp)->b_lock)); } static __inline int Index: sys/lock.h =================================================================== RCS file: /cvs/src/sys/sys/lock.h,v retrieving revision 1.10 diff -u -r1.10 lock.h --- sys/lock.h 9 Nov 2004 17:41:29 -0000 1.10 +++ sys/lock.h 16 Nov 2005 18:40:20 -0000 @@ -52,6 +52,9 @@ #ifndef _SYS_THREAD_H_ #include /* lwkt_token */ #endif +#ifndef _SYS_SPINLOCK_H_ +#include +#endif /* * The general lock structure. Provides for multiple shared locks, @@ -61,7 +64,7 @@ struct thread; struct lock { - lwkt_token lk_interlock; /* lock on remaining fields */ + struct spinlock lk_spinlock; /* lock on remaining fields */ u_int lk_flags; /* see below */ int lk_sharecount; /* # of accepted shared locks */ int lk_waitcount; /* # of processes sleeping for lock */ @@ -150,7 +153,7 @@ * Non-persistent external flags. */ #define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after - getting lk_interlock */ + getting lk_spinlock */ #define LK_RETRY 0x00020000 /* vn_lock: retry until locked */ #define LK_NOOBJ 0x00040000 /* vget: don't create object */ #define LK_THISLAYER 0x00080000 /* vn_lock: lock/unlock only current layer */ @@ -193,7 +196,7 @@ void lockreinit (struct lock *, int prio, char *wmesg, int timo, int flags); #ifdef DEBUG_LOCKS int debuglockmgr (struct lock *, u_int flags, - struct lwkt_tokref *, struct thread *p, + struct spinlock *, struct thread *p, const char *, const char *, int); @@ -202,7 +205,7 @@ "lockmgr", __FILE__, __LINE__) #else int lockmgr (struct lock *, u_int flags, - struct lwkt_tokref *, struct thread *td); + struct spinlock *, struct thread *td); #endif void lockmgr_printinfo (struct lock *); int lockstatus (struct lock *, struct thread *); Index: sys/spinlock2.h =================================================================== RCS file: /cvs/src/sys/sys/spinlock2.h,v retrieving revision 1.3 diff -u -r1.3 spinlock2.h --- sys/spinlock2.h 10 Nov 2005 00:53:23 -0000 1.3 +++ sys/spinlock2.h 16 Nov 2005 18:43:05 -0000 @@ -122,7 +122,7 @@ static __inline void spin_lock(struct spinlock *mtx) { - crit_enter(); + crit_enter_id("spin"); spin_lock_quick(mtx); } @@ -130,7 +130,7 @@ spin_unlock(struct spinlock *mtx) { spin_unlock_quick(mtx); - crit_exit(); + crit_exit_id("spin"); } #endif Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.33 diff -u -r1.33 systm.h --- sys/systm.h 14 Nov 2005 18:50:11 -0000 1.33 +++ sys/systm.h 16 Nov 2005 18:21:44 -0000 @@ -297,6 +297,7 @@ * less often. */ int tsleep (void *chan, int slpflags, const char *wmesg, int timo); +void tsleep_interlock (void *chan); void tstop (struct proc *); void wakeup (void *chan); void wakeup_one (void *chan); Index: vfs/ntfs/ntfs_inode.h =================================================================== RCS file: /cvs/src/sys/vfs/ntfs/ntfs_inode.h,v retrieving revision 1.5 diff -u -r1.5 ntfs_inode.h --- vfs/ntfs/ntfs_inode.h 28 Aug 2004 19:02:21 -0000 1.5 +++ vfs/ntfs/ntfs_inode.h 16 Nov 2005 18:38:12 -0000 @@ -70,7 +70,7 @@ /* locking */ struct lock i_lock; - struct lwkt_token i_interlock; + struct spinlock i_interlock; int i_usecount; LIST_HEAD(,fnode) i_fnlist; Index: vfs/ntfs/ntfs_subr.c =================================================================== RCS file: /cvs/src/sys/vfs/ntfs/ntfs_subr.c,v retrieving revision 1.15 diff -u -r1.15 ntfs_subr.c --- vfs/ntfs/ntfs_subr.c 2 Aug 2005 13:03:55 -0000 1.15 +++ vfs/ntfs/ntfs_subr.c 16 Nov 2005 18:35:42 -0000 @@ -350,8 +350,7 @@ ip->i_number, ip, ip->i_usecount)); ip->i_usecount++; /* ZZZ */ - lwkt_gettoken(&ilock, &ip->i_interlock); - LOCKMGR(&ip->i_lock, LK_EXCLUSIVE | LK_INTERLOCK, &ilock); + LOCKMGR(&ip->i_lock, LK_EXCLUSIVE, NULL); return 0; } @@ -396,7 +395,7 @@ /* init lock and lock the newborn ntnode */ lockinit(&ip->i_lock, 0, "ntnode", 0, LK_EXCLUSIVE); - lwkt_token_init(&ip->i_interlock); + spin_init(&ip->i_interlock); ntfs_ntget(ip); ntfs_nthashins(ip); @@ -421,38 +420,44 @@ ntfs_ntput(struct ntnode *ip) { struct ntvattr *vap; - lwkt_tokref ilock; dprintf(("ntfs_ntput: rele ntnode %"PRId64": %p, usecount: %d\n", ip->i_number, ip, ip->i_usecount)); - lwkt_gettoken(&ilock, &ip->i_interlock); + spin_lock(&ip->i_interlock); ip->i_usecount--; #ifdef DIAGNOSTIC if (ip->i_usecount < 0) { + spin_unlock(&ip->i_interlock); panic("ntfs_ntput: ino: %"PRId64" usecount: %d \n", ip->i_number,ip->i_usecount); } #endif if (ip->i_usecount > 0) { - LOCKMGR(&ip->i_lock, LK_RELEASE|LK_INTERLOCK, &ilock); + LOCKMGR(&ip->i_lock, LK_RELEASE|LK_INTERLOCK, &ip->i_interlock); return; } dprintf(("ntfs_ntput: deallocating ntnode: %"PRId64"\n", ip->i_number)); - if (ip->i_fnlist.lh_first) + if (ip->i_fnlist.lh_first) { + spin_unlock(&ip->i_interlock); panic("ntfs_ntput: ntnode has fnodes\n"); + } + /* + * XXX this is a bit iffy because we are making high level calls + * while holding a spinlock. + */ ntfs_nthashrem(ip); while ((vap = LIST_FIRST(&ip->i_valist)) != NULL) { LIST_REMOVE(vap,va_list); ntfs_freentvattr(vap); } - lwkt_reltoken(&ilock); + spin_unlock(&ip->i_interlock); vrele(ip->i_devvp); FREE(ip, M_NTFSNTNODE); } @@ -481,14 +486,15 @@ dprintf(("ntfs_ntrele: rele ntnode %"PRId64": %p, usecount: %d\n", ip->i_number, ip, ip->i_usecount)); - lwkt_gettoken(&ilock, &ip->i_interlock); + spin_lock(&ip->i_interlock); ip->i_usecount--; if (ip->i_usecount < 0) { + spin_unlock(&ip->i_interlock); panic("ntfs_ntrele: ino: %"PRId64" usecount: %d \n", ip->i_number,ip->i_usecount); } - lwkt_reltoken(&ilock); + spin_unlock(&ip->i_interlock); } /* Index: vm/vm_map.h =================================================================== RCS file: /cvs/src/sys/vm/vm_map.h,v retrieving revision 1.16 diff -u -r1.16 vm_map.h --- vm/vm_map.h 9 Oct 2005 20:12:34 -0000 1.16 +++ vm/vm_map.h 16 Nov 2005 18:25:03 -0000 @@ -343,21 +343,6 @@ lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curthread) #endif -#define vm_map_set_recursive(map) \ - do { \ - lwkt_tokref ilock; \ - lwkt_gettoken(&ilock, &(map)->lock.lk_interlock); \ - (map)->lock.lk_flags |= LK_CANRECURSE; \ - lwkt_reltoken(&ilock); \ - } while(0) -#define vm_map_clear_recursive(map) \ - do { \ - lwkt_tokref ilock; \ - lwkt_gettoken(&ilock, &(map)->lock.lk_interlock); \ - (map)->lock.lk_flags &= ~LK_CANRECURSE; \ - lwkt_reltoken(&ilock); \ - } while(0) - #endif /* _KERNEL */ /*