diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 55b5207..e7fdd5c 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1347,7 +1347,9 @@ brelse(struct buf *bp) } /* - * We must clear B_RELBUF if B_DELWRI or B_LOCKED is set. + * We must clear B_RELBUF if B_DELWRI or B_LOCKED is set, or + * if b_refs is non-zero. + * * If vfs_vmio_release() is called with either bit set, the * underlying pages may wind up getting freed causing a previous * write (bdwrite()) to get 'lost' because pages associated with @@ -1359,12 +1361,18 @@ brelse(struct buf *bp) * chance to countermand the release by setting B_LOCKED. * * We still allow the B_INVAL case to call vfs_vmio_release(), even - * if B_DELWRI is set. + * if B_DELWRI or B_LOCKED is set, or if b_refs is non-zero. In + * this case a copyin/copyout operation will cause a seg-fault e.g. + * if the file underlying a mmap() is truncated during the read or + * write operation. + * + * We still allow the B_NOCACHE case as well. Again, b_refs only + * prevents the buffer from being reused, not from being invalidated. * * If B_DELWRI is not set we may have to set B_RELBUF if we are low * on pages to return pages to the VM page queues. */ - if (bp->b_flags & (B_DELWRI | B_LOCKED)) { + if ((bp->b_flags & (B_DELWRI | B_LOCKED)) || bp->b_refs) { bp->b_flags &= ~B_RELBUF; } else if (vm_page_count_severe()) { if (LIST_FIRST(&bp->b_dep) != NULL) @@ -1720,6 +1728,34 @@ bqrelse(struct buf *bp) } /* + * Hold a buffer, preventing it from being reused. This will prevent + * normal B_RELBUF operations on the buffer but will not prevent B_INVAL + * operations. If a B_INVAL operation occurs the buffer will remain held + * but the underlying pages may get ripped out. + * + * These functions are typically used in VOP_READ/VOP_WRITE functions + * to hold a buffer during a copyin or copyout, preventing deadlocks + * or recursive lock panics when read()/write() is used over mmap()'d + * space. + * + * NOTE: bqhold() requires that the buffer be locked at the time of the + * hold. bqdrop() has no requirements other than the buffer having + * previously been held. + */ +void +bqhold(struct buf *bp) +{ + atomic_add_int(&bp->b_refs, 1); +} + +void +bqdrop(struct buf *bp) +{ + KKASSERT(bp->b_refs > 0); + atomic_add_int(&bp->b_refs, -1); +} + +/* * vfs_vmio_release: * * Return backing pages held by the buffer 'bp' back to the VM system @@ -2790,12 +2826,6 @@ findblk(struct vnode *vp, off_t loffset, int flags) return(bp); } -void -unrefblk(struct buf *bp) -{ - atomic_subtract_int(&bp->b_refs, 1); -} - /* * getcacheblk: * @@ -2902,7 +2932,7 @@ loop: */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { if (blkflags & GETBLK_NOWAIT) { - unrefblk(bp); + bqdrop(bp); return(NULL); } lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL; @@ -2910,14 +2940,14 @@ loop: lkflags |= LK_PCATCH; error = BUF_TIMELOCK(bp, lkflags, "getblk", slptimeo); if (error) { - unrefblk(bp); + bqdrop(bp); if (error == ENOLCK) goto loop; return (NULL); } /* buffer may have changed on us */ } - unrefblk(bp); + bqdrop(bp); /* * Once the buffer has been locked, make sure we didn't race diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 5c2a0d6..e9ec0f7 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -177,7 +177,7 @@ struct buf { int b_kvasize; /* size of kva for buffer */ int b_dirtyoff; /* Offset in buffer of dirty region. */ int b_dirtyend; /* Offset of end of dirty region. */ - int b_refs; /* FINDBLK_REF / unrefblk() */ + int b_refs; /* FINDBLK_REF / bqdrop() */ struct xio b_xio; /* data buffer page list management */ struct bio_ops *b_ops; /* bio_ops used w/ b_dep */ struct workhead b_dep; /* List of filesystem dependencies. */ @@ -424,7 +424,8 @@ struct buf *findblk (struct vnode *, off_t, int); struct buf *getblk (struct vnode *, off_t, int, int, int); struct buf *getcacheblk (struct vnode *, off_t); struct buf *geteblk (int); -void unrefblk(struct buf *bp); +void bqhold(struct buf *bp); +void bqdrop(struct buf *bp); void regetblk(struct buf *bp); struct bio *push_bio(struct bio *); struct bio *pop_bio(struct bio *); diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 4d8cdae..78be315 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -424,11 +424,20 @@ skip: n = uio->uio_resid; if (n > ip->ino_data.size - uio->uio_offset) n = (int)(ip->ino_data.size - uio->uio_offset); - error = uiomove((char *)bp->b_data + offset, n, uio); - /* data has a lower priority then meta-data */ + /* + * Set B_AGE, data has a lower priority than meta-data. + * + * Use a hold/unlock/drop sequence to run the uiomove + * with the buffer unlocked, which avoids deadlocks + * when read() is used on mmap()'d space. + */ bp->b_flags |= B_AGE; + bqhold(bp); bqrelse(bp); + error = uiomove((char *)bp->b_data + offset, n, uio); + bqdrop(bp); + if (error) break; hammer_stats_file_read += n;