mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-03 09:00:21 +00:00
buf: Add B_INVALONERR flag to discard data
Setting the B_INVALONERR flag before a synchronous write causes the buf cache to forcibly invalidate contents if the write fails (BIO_ERROR). This is intended to be used to allow layers above the buffer cache to make more informed decisions about when discarding dirty buffers without successful write is acceptable. As a proof of concept, use in msdosfs to handle failures to mark the on-disk 'dirty' bit during rw mount or ro->rw update. Extending this to other filesystems is left as future work. PR: 210316 Reviewed by: kib (with objections) Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D21539
This commit is contained in:
parent
8d910a4282
commit
aaa3852435
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=352233
@ -103,7 +103,13 @@ int fatentry(int function, struct msdosfsmount *pmp, u_long cluster, u_long *old
|
||||
int freeclusterchain(struct msdosfsmount *pmp, u_long startchain);
|
||||
int extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags);
|
||||
void fc_purge(struct denode *dep, u_int frcn);
|
||||
int markvoldirty(struct msdosfsmount *pmp, int dirty);
|
||||
int markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade);
|
||||
|
||||
static inline int
|
||||
markvoldirty(struct msdosfsmount *pmp, bool dirty)
|
||||
{
|
||||
return (markvoldirty_upgrade(pmp, dirty, false));
|
||||
}
|
||||
|
||||
#endif /* _KERNEL || MAKEFS */
|
||||
#endif /* !_FS_MSDOSFS_FAT_H_ */
|
||||
|
@ -1117,7 +1117,7 @@ extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp,
|
||||
* ? (other errors from called routines)
|
||||
*/
|
||||
int
|
||||
markvoldirty(struct msdosfsmount *pmp, int dirty)
|
||||
markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade)
|
||||
{
|
||||
struct buf *bp;
|
||||
u_long bn, bo, bsize, byteoffset, fatval;
|
||||
@ -1130,8 +1130,11 @@ markvoldirty(struct msdosfsmount *pmp, int dirty)
|
||||
if (FAT12(pmp))
|
||||
return (0);
|
||||
|
||||
/* Can't change the bit on a read-only filesystem. */
|
||||
if (pmp->pm_flags & MSDOSFSMNT_RONLY)
|
||||
/*
|
||||
* Can't change the bit on a read-only filesystem, except as part of
|
||||
* ro->rw upgrade.
|
||||
*/
|
||||
if ((pmp->pm_flags & MSDOSFSMNT_RONLY) != 0 && !rw_upgrade)
|
||||
return (EROFS);
|
||||
|
||||
/*
|
||||
@ -1167,6 +1170,29 @@ markvoldirty(struct msdosfsmount *pmp, int dirty)
|
||||
putushort(&bp->b_data[bo], fatval);
|
||||
}
|
||||
|
||||
/*
|
||||
* The concern here is that a devvp may be readonly, without reporting
|
||||
* itself as such through the usual channels. In that case, we'd like
|
||||
* it if attempting to mount msdosfs rw didn't panic the system.
|
||||
*
|
||||
* markvoldirty is invoked as the first write on backing devvps when
|
||||
* either msdosfs is mounted for the first time, or a ro mount is
|
||||
* upgraded to rw.
|
||||
*
|
||||
* In either event, if a write error occurs dirtying the volume:
|
||||
* - No user data has been permitted to be written to cache yet.
|
||||
* - We can abort the high-level operation (mount, or ro->rw) safely.
|
||||
* - We don't derive any benefit from leaving a zombie dirty buf in
|
||||
* the cache that can not be cleaned or evicted.
|
||||
*
|
||||
* So, mark B_INVALONERR to have bwrite() -> brelse() detect that
|
||||
* condition and force-invalidate our write to the block if it occurs.
|
||||
*
|
||||
* PR 210316 provides more context on the discovery and diagnosis of
|
||||
* the problem, as well as earlier attempts to solve it.
|
||||
*/
|
||||
bp->b_flags |= B_INVALONERR;
|
||||
|
||||
/* Write out the modified FAT block synchronously. */
|
||||
return (bwrite(bp));
|
||||
}
|
||||
|
@ -311,16 +311,25 @@ msdosfs_mount(struct mount *mp)
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/* Now that the volume is modifiable, mark it dirty. */
|
||||
error = markvoldirty_upgrade(pmp, true, true);
|
||||
if (error) {
|
||||
/*
|
||||
* If dirtying the superblock failed, drop GEOM
|
||||
* 'w' refs (we're still RO).
|
||||
*/
|
||||
g_topology_lock();
|
||||
(void)g_access(pmp->pm_cp, 0, -1, 0);
|
||||
g_topology_unlock();
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
pmp->pm_fmod = 1;
|
||||
pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
|
||||
MNT_ILOCK(mp);
|
||||
mp->mnt_flag &= ~MNT_RDONLY;
|
||||
MNT_IUNLOCK(mp);
|
||||
|
||||
/* Now that the volume is modifiable, mark it dirty. */
|
||||
error = markvoldirty(pmp, 1);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -701,10 +710,8 @@ mountmsdosfs(struct vnode *devvp, struct mount *mp)
|
||||
if (ronly)
|
||||
pmp->pm_flags |= MSDOSFSMNT_RONLY;
|
||||
else {
|
||||
if ((error = markvoldirty(pmp, 1)) != 0) {
|
||||
(void)markvoldirty(pmp, 0);
|
||||
if ((error = markvoldirty(pmp, 1)) != 0)
|
||||
goto error_exit;
|
||||
}
|
||||
pmp->pm_fmod = 1;
|
||||
}
|
||||
mp->mnt_data = pmp;
|
||||
|
@ -2614,6 +2614,19 @@ brelse(struct buf *bp)
|
||||
BO_UNLOCK(bp->b_bufobj);
|
||||
bdirty(bp);
|
||||
}
|
||||
|
||||
if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
|
||||
(bp->b_flags & B_INVALONERR)) {
|
||||
/*
|
||||
* Forced invalidation of dirty buffer contents, to be used
|
||||
* after a failed write in the rare case that the loss of the
|
||||
* contents is acceptable. The buffer is invalidated and
|
||||
* freed.
|
||||
*/
|
||||
bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
|
||||
bp->b_flags &= ~(B_ASYNC | B_CACHE);
|
||||
}
|
||||
|
||||
if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
|
||||
(bp->b_error != ENXIO || !LIST_EMPTY(&bp->b_dep)) &&
|
||||
!(bp->b_flags & B_INVAL)) {
|
||||
|
@ -196,6 +196,11 @@ struct buf {
|
||||
* may not be used with the stage 1 data write under NFS
|
||||
* but may be used for the commit rpc portion.
|
||||
*
|
||||
* B_INVALONERR This flag is set on dirty buffers. It specifies that a
|
||||
* write error should forcibly invalidate the buffer
|
||||
* contents. This flag should be used with caution, as it
|
||||
* discards data. It is incompatible with B_ASYNC.
|
||||
*
|
||||
* B_VMIO Indicates that the buffer is tied into an VM object.
|
||||
* The buffer's data is always PAGE_SIZE aligned even
|
||||
* if b_bufsize and b_bcount are not. ( b_bufsize is
|
||||
@ -226,7 +231,7 @@ struct buf {
|
||||
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
|
||||
#define B_MALLOC 0x00010000 /* malloced b_data */
|
||||
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
|
||||
#define B_00040000 0x00040000 /* Available flag. */
|
||||
#define B_INVALONERR 0x00040000 /* Invalidate on write error. */
|
||||
#define B_00080000 0x00080000 /* Available flag. */
|
||||
#define B_00100000 0x00100000 /* Available flag. */
|
||||
#define B_00200000 0x00200000 /* Available flag. */
|
||||
@ -243,7 +248,7 @@ struct buf {
|
||||
|
||||
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
|
||||
"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
|
||||
"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
|
||||
"\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \
|
||||
"\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \
|
||||
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
|
||||
|
||||
|
@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/errno.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include <sys/param.h>
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/mount.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
Loading…
Reference in New Issue
Block a user