From eef33ce9bdc93d8fec1e9782602ba39ce04edf62 Mon Sep 17 00:00:00 2001 From: Kirk McKusick Date: Tue, 2 Mar 1999 04:04:31 +0000 Subject: [PATCH] When fsync'ing a file on a filesystem using soft updates, we first try to write all the dirty blocks. If some of those blocks have dependencies, they will be remarked dirty when the I/O completes. On systems with really fast I/O systems, it is possible to get in an infinite loop trying to flush the buffers, because the I/O finishes before we can get all the dirty buffers off the v_dirtyblkhd list and into the I/O queue. (The previous algorithm looped over the v_dirtyblkhd list writing out buffers until the list emptied.) So, now we mark each buffer that we try to write so that we can distinguish the ones that are being remarked dirty from those that we have not yet tried to flush. Once we have tried to push every buffer once, we then push any associated metadata that is causing the remaining buffers to be redirtied. Submitted by: Matthew Dillon --- sys/sys/bio.h | 4 ++-- sys/sys/buf.h | 4 ++-- sys/ufs/ffs/ffs_vnops.c | 28 ++++++++++++++++++---------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 58b309b232e..5ce4039ce90 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.62 1999/01/21 08:29:08 dillon Exp $ + * $Id: buf.h,v 1.63 1999/01/21 13:41:12 peter Exp $ */ #ifndef _SYS_BUF_H_ @@ -164,7 +164,7 @@ struct buf { #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ #define B_ERROR 0x00000800 /* I/O error occurred. */ -#define B_AVAIL2 0x00001000 /* Available flag */ +#define B_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */ #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 58b309b232e..5ce4039ce90 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.62 1999/01/21 08:29:08 dillon Exp $ + * $Id: buf.h,v 1.63 1999/01/21 13:41:12 peter Exp $ */ #ifndef _SYS_BUF_H_ @@ -164,7 +164,7 @@ struct buf { #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ #define B_ERROR 0x00000800 /* I/O error occurred. */ -#define B_AVAIL2 0x00001000 /* Available flag */ +#define B_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */ #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 1823c0f0af1..c44b1333541 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 - * $Id: ffs_vnops.c,v 1.53 1998/10/31 15:31:27 peter Exp $ + * $Id: ffs_vnops.c,v 1.54 1999/01/07 16:14:17 bde Exp $ */ #include @@ -142,9 +142,11 @@ ffs_fsync(ap) skipmeta = 0; if (ap->a_waitfor == MNT_WAIT) skipmeta = 1; -loop: s = splbio(); -loop2: +loop: + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; + bp = TAILQ_NEXT(bp, b_vnbufs)) + bp->b_flags &= ~B_SCANNED; for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); /* @@ -152,7 +154,7 @@ loop2: * or if it's already scheduled, skip to the next * buffer */ - if ((bp->b_flags & B_BUSY) || + if ((bp->b_flags & (B_BUSY | B_SCANNED)) || ((skipmeta == 1) && (bp->b_lblkno < 0))) continue; if ((bp->b_flags & B_DELWRI) == 0) @@ -162,6 +164,7 @@ loop2: * asked to wait for everything, or it's not a file or BDEV, * start the IO on this buffer immediatly. */ + bp->b_flags |= B_SCANNED; if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) || ((vp->v_type != VREG) && (vp->v_type != VBLK))) { @@ -174,18 +177,19 @@ loop2: if ((bp->b_flags & B_CLUSTEROK) && ap->a_waitfor != MNT_WAIT) { (void) vfs_bio_awrite(bp); - splx(s); } else { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); (void) bawrite(bp); + s = splbio(); } } else { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); (void) bwrite(bp); + s = splbio(); } } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { /* @@ -194,13 +198,17 @@ loop2: */ bremfree(bp); bp->b_flags |= B_BUSY | B_INVAL | B_NOCACHE; - brelse(bp); splx(s); + brelse(bp); + s = splbio(); } else { vfs_bio_awrite(bp); - splx(s); } - goto loop; + /* + * Since we may have slept during the I/O, we need + * to start from a known point. + */ + nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); } /* * If we were asked to do this synchronously, then go back for @@ -208,7 +216,7 @@ loop2: */ if (skipmeta) { skipmeta = 0; - goto loop2; /* stay within the splbio() */ + goto loop; } if (ap->a_waitfor == MNT_WAIT) { @@ -238,7 +246,7 @@ loop2: */ if (passes > 0) { passes -= 1; - goto loop2; + goto loop; } #ifdef DIAGNOSTIC if (vp->v_type != VBLK)