1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-21 11:13:30 +00:00

Update to soft updates journaling to properly track freed blocks

that get claimed by snapshots.

Submitted by:	Jeff Roberson
Tested by:	Peter Holm
This commit is contained in:
Kirk McKusick 2011-06-12 19:27:05 +00:00
parent 8e6cab54e8
commit 9eb8728aa5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=223020
5 changed files with 180 additions and 30 deletions

View File

@ -2035,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
*/
if (devvp->v_type != VREG &&
(devvp->v_vflag & VV_COPYONWRITE) &&
ffs_snapblkfree(fs, devvp, bno, size, inum)) {
ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) {
return;
}
if (!ump->um_candelete) {

View File

@ -80,12 +80,14 @@ int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
int ffs_sbupdate(struct ufsmount *, int, int);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t);
int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
struct workhead *);
void ffs_snapremove(struct vnode *vp);
int ffs_snapshot(struct mount *mp, char *snapfile);
void ffs_snapshot_mount(struct mount *mp);
void ffs_snapshot_unmount(struct mount *mp);
void process_deferred_inactive(struct mount *mp);
void ffs_sync_snap(struct mount *, int);
int ffs_syncvnode(struct vnode *vp, int waitfor);
int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
int ffs_update(struct vnode *, int);
@ -149,6 +151,9 @@ int softdep_prealloc(struct vnode *, int);
int softdep_journal_lookup(struct mount *, struct vnode **);
void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
void softdep_journal_fsync(struct inode *);
void softdep_buf_append(struct buf *, struct workhead *);
void softdep_inode_append(struct inode *, struct ucred *, struct workhead *);
void softdep_freework(struct workhead *);
/*
@ -161,4 +166,14 @@ void softdep_journal_fsync(struct inode *);
int ffs_rdonly(struct inode *);
TAILQ_HEAD(snaphead, inode);
struct snapdata {
LIST_ENTRY(snapdata) sn_link;
struct snaphead sn_head;
daddr_t sn_listsize;
daddr_t *sn_blklist;
struct lock sn_lock;
};
#endif /* !_UFS_FFS_EXTERN_H */

View File

@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile)
}
int
ffs_snapblkfree(fs, devvp, bno, size, inum)
ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
struct workhead *wkhd;
{
return (EINVAL);
}
@ -123,19 +124,16 @@ ffs_copyonwrite(devvp, bp)
return (EINVAL);
}
void
ffs_sync_snap(mp, waitfor)
struct mount *mp;
int waitfor;
{
}
#else
FEATURE(ffs_snapshot, "FFS snapshot support");
TAILQ_HEAD(snaphead, inode);
struct snapdata {
LIST_ENTRY(snapdata) sn_link;
struct snaphead sn_head;
daddr_t sn_listsize;
daddr_t *sn_blklist;
struct lock sn_lock;
};
LIST_HEAD(, snapdata) snapfree;
static struct mtx snapfree_lock;
MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
@ -1635,7 +1633,7 @@ ffs_snapremove(vp)
DIP_SET(ip, i_db[blkno], 0);
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
ip->i_number))) {
ip->i_number, NULL))) {
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
btodb(fs->fs_bsize));
DIP_SET(ip, i_db[blkno], 0);
@ -1660,7 +1658,7 @@ ffs_snapremove(vp)
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk,
fs->fs_bsize, ip->i_number))) {
fs->fs_bsize, ip->i_number, NULL))) {
ip->i_din1->di_blocks -=
btodb(fs->fs_bsize);
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
@ -1674,7 +1672,7 @@ ffs_snapremove(vp)
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk,
fs->fs_bsize, ip->i_number))) {
fs->fs_bsize, ip->i_number, NULL))) {
ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
}
@ -1722,12 +1720,13 @@ ffs_snapremove(vp)
* must always have been allocated from a BLK_NOCOPY location.
*/
int
ffs_snapblkfree(fs, devvp, bno, size, inum)
ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
struct workhead *wkhd;
{
struct buf *ibp, *cbp, *savedcbp = 0;
struct thread *td = curthread;
@ -1825,6 +1824,17 @@ ffs_snapblkfree(fs, devvp, bno, size, inum)
"Grabonremove: snapino", ip->i_number,
(intmax_t)lbn, inum);
#endif
/*
* If journaling is tracking this write we must add
* the work to the inode or indirect being written.
*/
if (wkhd != NULL) {
if (lbn < NDADDR)
softdep_inode_append(ip,
curthread->td_ucred, wkhd);
else
softdep_buf_append(ibp, wkhd);
}
if (lbn < NDADDR) {
DIP_SET(ip, i_db[lbn], bno);
} else if (ip->i_ump->um_fstype == UFS1) {
@ -1902,6 +1912,8 @@ ffs_snapblkfree(fs, devvp, bno, size, inum)
* not be freed. Although space will be lost, the snapshot
* will stay consistent.
*/
if (error != 0 && wkhd != NULL)
softdep_freework(wkhd);
lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
return (error);
}
@ -2399,6 +2411,42 @@ ffs_copyonwrite(devvp, bp)
return (error);
}
/*
* sync snapshots to force freework records waiting on snapshots to claim
* blocks to free.
*/
void
ffs_sync_snap(mp, waitfor)
struct mount *mp;
int waitfor;
{
struct snapdata *sn;
struct vnode *devvp;
struct vnode *vp;
struct inode *ip;
devvp = VFSTOUFS(mp)->um_devvp;
if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
return;
for (;;) {
VI_LOCK(devvp);
sn = devvp->v_rdev->si_snapdata;
if (sn == NULL) {
VI_UNLOCK(devvp);
return;
}
if (lockmgr(&sn->sn_lock,
LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
VI_MTX(devvp)) == 0)
break;
}
TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
vp = ITOV(ip);
ffs_syncvnode(vp, waitfor);
}
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
}
/*
* Read the specified block into the given buffer.
* Much of this boiler-plate comes from bwrite().

View File

@ -584,6 +584,33 @@ softdep_get_depcounts(struct mount *mp,
*softdepactiveaccp = 0;
}
void
softdep_buf_append(bp, wkhd)
struct buf *bp;
struct workhead *wkhd;
{
panic("softdep_buf_appendwork called");
}
void
softdep_inode_append(ip, cred, wkhd)
struct inode *ip;
struct ucred *cred;
struct workhead *wkhd;
{
panic("softdep_inode_appendwork called");
}
void
softdep_freework(wkhd)
struct workhead *wkhd;
{
panic("softdep_freework called");
}
#else
FEATURE(softupdates, "FFS soft-updates support");
@ -867,7 +894,7 @@ static void freework_enqueue(struct freework *);
static int handle_workitem_freeblocks(struct freeblks *, int);
static int handle_complete_freeblocks(struct freeblks *, int);
static void handle_workitem_indirblk(struct freework *);
static void handle_written_freework(struct freework *, int);
static void handle_written_freework(struct freework *);
static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
struct workhead *);
@ -1632,6 +1659,7 @@ process_truncates(vp)
if (cgwait) {
FREE_LOCK(&lk);
sync_cgs(mp, MNT_WAIT);
ffs_sync_snap(mp, MNT_WAIT);
ACQUIRE_LOCK(&lk);
continue;
}
@ -5922,7 +5950,7 @@ complete_trunc_indir(freework)
*/
if (bp == NULL) {
if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
handle_written_freework(freework, 0);
handle_written_freework(freework);
else
WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
&freework->fw_list);
@ -5974,7 +6002,7 @@ blkcount(fs, datablocks, length)
*/
if (totblks > datablocks)
return (0);
return (totblks - datablocks);
return (datablocks - totblks);
}
/*
@ -7228,6 +7256,7 @@ freework_freeblock(freework)
cancel_jnewblk(jnewblk, &wkhd);
needj = 0;
} else if (needj) {
freework->fw_state |= DELAYEDFREE;
freeblks->fb_cgwait++;
WORKLIST_INSERT(&wkhd, &freework->fw_list);
}
@ -7241,7 +7270,7 @@ freework_freeblock(freework)
* made it to disk. We can immediately free the freeblk.
*/
if (needj == 0)
handle_written_freework(freework, 0);
handle_written_freework(freework);
}
/*
@ -7256,7 +7285,8 @@ freework_enqueue(freework)
struct freeblks *freeblks;
freeblks = freework->fw_freeblks;
WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
if ((freework->fw_state & INPROGRESS) == 0)
WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
if ((freeblks->fb_state &
(ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
LIST_EMPTY(&freeblks->fb_jblkdephd))
@ -7282,13 +7312,14 @@ handle_workitem_indirblk(freework)
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
fs = ump->um_fs;
if (freework->fw_state & DEPCOMPLETE) {
handle_written_freework(freework, 0);
handle_written_freework(freework);
return;
}
if (freework->fw_off == NINDIR(fs)) {
freework_freeblock(freework);
return;
}
freework->fw_state |= INPROGRESS;
FREE_LOCK(&lk);
indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
freework->fw_lbn);
@ -7301,16 +7332,16 @@ handle_workitem_indirblk(freework)
* the freeblks is added back to the worklist if there is more work to do.
*/
static void
handle_written_freework(freework, cgwrite)
handle_written_freework(freework)
struct freework *freework;
int cgwrite;
{
struct freeblks *freeblks;
struct freework *parent;
freeblks = freework->fw_freeblks;
parent = freework->fw_parent;
freeblks->fb_cgwait -= cgwrite;
if (freework->fw_state & DELAYEDFREE)
freeblks->fb_cgwait--;
freework->fw_state |= COMPLETE;
if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
WORKITEM_FREE(freework, D_FREEWORK);
@ -7552,6 +7583,8 @@ indir_trunc(freework, dbn, lbn)
return;
}
ACQUIRE_LOCK(&lk);
/* Protects against a race with complete_trunc_indir(). */
freework->fw_state &= ~INPROGRESS;
/*
* If we have an indirdep we need to enforce the truncation order
* and discard it when it is complete.
@ -7675,7 +7708,7 @@ indir_trunc(freework, dbn, lbn)
if (freework->fw_blkno == dbn) {
freework->fw_state |= ALLCOMPLETE;
ACQUIRE_LOCK(&lk);
handle_written_freework(freework, 0);
handle_written_freework(freework);
FREE_LOCK(&lk);
}
return;
@ -10368,8 +10401,7 @@ softdep_disk_write_complete(bp)
continue;
case D_FREEWORK:
/* Freework on an indirect block, not bmsafemap. */
handle_written_freework(WK_FREEWORK(wk), 0);
handle_written_freework(WK_FREEWORK(wk));
break;
case D_JSEGDEP:
@ -10540,7 +10572,7 @@ handle_jwork(wkhd)
free_freedep(WK_FREEDEP(wk));
continue;
case D_FREEWORK:
handle_written_freework(WK_FREEWORK(wk), 1);
handle_written_freework(WK_FREEWORK(wk));
continue;
default:
panic("handle_jwork: Unknown type %s\n",
@ -12738,6 +12770,53 @@ clear_inodedeps(td)
}
}
void
softdep_buf_append(bp, wkhd)
struct buf *bp;
struct workhead *wkhd;
{
struct worklist *wk;
ACQUIRE_LOCK(&lk);
while ((wk = LIST_FIRST(wkhd)) != NULL) {
WORKLIST_REMOVE(wk);
WORKLIST_INSERT(&bp->b_dep, wk);
}
FREE_LOCK(&lk);
}
void
softdep_inode_append(ip, cred, wkhd)
struct inode *ip;
struct ucred *cred;
struct workhead *wkhd;
{
struct buf *bp;
struct fs *fs;
int error;
fs = ip->i_fs;
error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int)fs->fs_bsize, cred, &bp);
if (error) {
softdep_freework(wkhd);
return;
}
softdep_buf_append(bp, wkhd);
bqrelse(bp);
}
void
softdep_freework(wkhd)
struct workhead *wkhd;
{
ACQUIRE_LOCK(&lk);
handle_jwork(wkhd);
FREE_LOCK(&lk);
}
/*
* Function to determine if the buffer has outstanding dependencies
* that will cause a roll-back if the buffer is written. If wantcount

View File

@ -1838,6 +1838,8 @@ ufs_mkdir(ap)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, ucp, 0))) {
if (DOINGSOFTDEP(tvp))
softdep_revert_link(dp, ip);
UFS_VFREE(tvp, ip->i_number, dmode);
vput(tvp);
return (error);
@ -1850,6 +1852,8 @@ ufs_mkdir(ap)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, cnp->cn_cred, 0))) {
if (DOINGSOFTDEP(tvp))
softdep_revert_link(dp, ip);
UFS_VFREE(tvp, ip->i_number, dmode);
vput(tvp);
return (error);
@ -2608,6 +2612,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, ucp, 0))) {
if (DOINGSOFTDEP(tvp))
softdep_revert_link(pdir, ip);
UFS_VFREE(tvp, ip->i_number, mode);
vput(tvp);
return (error);
@ -2620,6 +2626,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, cnp->cn_cred, 0))) {
if (DOINGSOFTDEP(tvp))
softdep_revert_link(pdir, ip);
UFS_VFREE(tvp, ip->i_number, mode);
vput(tvp);
return (error);