From cf087c12c2b0a12cf2ca6e695a734351be3696d1 Mon Sep 17 00:00:00 2001 From: Peter Grehan Date: Sat, 22 Feb 2014 07:18:06 +0000 Subject: [PATCH] ZFS boot support for bhyveload. Modelled after the i386 zfsloader. However, with no 2nd stage zfsboot to search for a bootable dataset, attempt a ZFS boot if there is more than one ZFS dataset found during the disk probe. sys/boot/userboot/zfs - build the ZFS boot library sys/boot/userboot/userboot/ conf.c - Add the ZFS pool and filesystem tables devicename.c - correctly format ZFS devices main.c - increase the size of the libstand malloc pool to account for the increased usage from ZFS buffers - probe for a ZFS dataset, and if one is found, attempt to boot from it. usr.sbin/bhyveload/bhyveload.c - allow multiple invocations of the '-d' option to specify multiple disks e.g. a raidz set. Up to 32 disks are supported. Tested with various combinations of GPT, MBR, single and multiple disks, RAID-Z, mirrors. Reviewed by: neel Discussed with: avg Tested by: Michael Dexter and others MFC after: 3 weeks --- sys/boot/userboot/Makefile | 2 +- sys/boot/userboot/userboot/Makefile | 9 ++- sys/boot/userboot/userboot/conf.c | 10 +++ sys/boot/userboot/userboot/devicename.c | 24 ++++++- sys/boot/userboot/userboot/main.c | 89 ++++++++++++++++++++++--- sys/boot/userboot/zfs/Makefile | 18 +++++ usr.sbin/bhyveload/bhyveload.c | 46 +++++++++---- 7 files changed, 169 insertions(+), 29 deletions(-) create mode 100644 sys/boot/userboot/zfs/Makefile diff --git a/sys/boot/userboot/Makefile b/sys/boot/userboot/Makefile index f15c9054c417..ebacf6454773 100644 --- a/sys/boot/userboot/Makefile +++ b/sys/boot/userboot/Makefile @@ -2,7 +2,7 @@ .include -SUBDIR= ficl libstand test userboot +SUBDIR= ficl libstand test zfs userboot .include diff --git a/sys/boot/userboot/userboot/Makefile b/sys/boot/userboot/userboot/Makefile index 076b4b27bc67..b3bba107f5ba 100644 --- a/sys/boot/userboot/userboot/Makefile +++ b/sys/boot/userboot/userboot/Makefile @@ -51,12 +51,17 @@ LIBFICL= ${.OBJDIR}/../ficl/libficl.a LIBSTAND= ${.OBJDIR}/../libstand/libstand.a .endif +.if ${MK_ZFS} != "no" +CFLAGS+= -DUSERBOOT_ZFS_SUPPORT +LIBZFS= ${.OBJDIR}/../zfs/libzfsboot.a +.endif + # Always add MI sources .PATH: ${.CURDIR}/../../common .include "${.CURDIR}/../../common/Makefile.inc" CFLAGS+= -I${.CURDIR}/../../common CFLAGS+= -I. -DPADD= ${LIBFICL} ${LIBSTAND} -LDADD= ${LIBFICL} ${LIBSTAND} +DPADD= ${LIBFICL} ${LIBZFS} ${LIBSTAND} +LDADD= ${LIBFICL} ${LIBZFS} ${LIBSTAND} .include diff --git a/sys/boot/userboot/userboot/conf.c b/sys/boot/userboot/userboot/conf.c index 2a98434ec23d..5eac87da9869 100644 --- a/sys/boot/userboot/userboot/conf.c +++ b/sys/boot/userboot/userboot/conf.c @@ -38,6 +38,10 @@ __FBSDID("$FreeBSD$"); #include "libuserboot.h" +#if defined(USERBOOT_ZFS_SUPPORT) +#include "../zfs/libzfs.h" +#endif + /* * We could use linker sets for some or all of these, but * then we would have to control what ended up linked into @@ -51,6 +55,9 @@ __FBSDID("$FreeBSD$"); struct devsw *devsw[] = { &host_dev, &userboot_disk, +#if defined(USERBOOT_ZFS_SUPPORT) + &zfs_dev, +#endif NULL }; @@ -59,6 +66,9 @@ struct fs_ops *file_system[] = { &ufs_fsops, &cd9660_fsops, &gzipfs_fsops, +#if defined(USERBOOT_ZFS_SUPPORT) + &zfs_fsops, +#endif NULL }; diff --git a/sys/boot/userboot/userboot/devicename.c b/sys/boot/userboot/userboot/devicename.c index d54d0230d7b9..8569ed4c60e4 100644 --- a/sys/boot/userboot/userboot/devicename.c +++ b/sys/boot/userboot/userboot/devicename.c @@ -34,6 +34,10 @@ __FBSDID("$FreeBSD$"); #include "disk.h" #include "libuserboot.h" +#if defined(USERBOOT_ZFS_SUPPORT) +#include "../zfs/libzfs.h" +#endif + static int userboot_parsedev(struct disk_devdesc **dev, const char *devspec, const char **path); /* @@ -119,7 +123,6 @@ userboot_parsedev(struct disk_devdesc **dev, const char *devspec, const char **p case DEVT_CD: case DEVT_NET: - case DEVT_ZFS: unit = 0; if (*np && (*np != ':')) { @@ -141,6 +144,16 @@ userboot_parsedev(struct disk_devdesc **dev, const char *devspec, const char **p *path = (*cp == 0) ? cp : cp + 1; break; + case DEVT_ZFS: +#if defined(USERBOOT_ZFS_SUPPORT) + err = zfs_parsedev((struct zfs_devdesc *)idev, np, path); + if (err != 0) + goto fail; + break; +#else + /* FALLTHROUGH */ +#endif + default: err = EINVAL; goto fail; @@ -179,9 +192,16 @@ userboot_fmtdev(void *vdev) return (disk_fmtdev(vdev)); case DEVT_NET: - case DEVT_ZFS: sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit); break; + + case DEVT_ZFS: +#if defined(USERBOOT_ZFS_SUPPORT) + return (zfs_fmtdev(vdev)); +#else + sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit); +#endif + break; } return(buf); } diff --git a/sys/boot/userboot/userboot/main.c b/sys/boot/userboot/userboot/main.c index 39f6012e16bc..c9353ab7a045 100644 --- a/sys/boot/userboot/userboot/main.c +++ b/sys/boot/userboot/userboot/main.c @@ -36,8 +36,17 @@ __FBSDID("$FreeBSD$"); #include "disk.h" #include "libuserboot.h" +#if defined(USERBOOT_ZFS_SUPPORT) +#include "../zfs/libzfs.h" + +static void userboot_zfs_probe(void); +static int userboot_zfs_found; +#endif + #define USERBOOT_VERSION USERBOOT_VERSION_3 +#define MALLOCSZ (10*1024*1024) + struct loader_callbacks *callbacks; void *callbacks_arg; @@ -69,7 +78,7 @@ exit(int v) void loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) { - static char malloc[1024*1024]; + static char mallocbuf[MALLOCSZ]; const char *var; int i; @@ -82,23 +91,15 @@ loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) /* * initialise the heap as early as possible. Once this is done, - * alloc() is usable. The stack is buried inside us, so this is - * safe. + * alloc() is usable. */ - setheap((void *)malloc, (void *)(malloc + 1024*1024)); + setheap((void *)mallocbuf, (void *)(mallocbuf + sizeof(mallocbuf))); /* * Hook up the console */ cons_probe(); - /* - * March through the device switch probing for things. - */ - for (i = 0; devsw[i] != NULL; i++) - if (devsw[i]->dv_init != NULL) - (devsw[i]->dv_init)(); - printf("\n"); printf("%s, Revision %s\n", bootprog_name, bootprog_rev); printf("(%s, %s)\n", bootprog_maker, bootprog_date); @@ -124,6 +125,16 @@ loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) archsw.arch_copyin = userboot_copyin; archsw.arch_copyout = userboot_copyout; archsw.arch_readin = userboot_readin; +#if defined(USERBOOT_ZFS_SUPPORT) + archsw.arch_zfs_probe = userboot_zfs_probe; +#endif + + /* + * March through the device switch probing for things. + */ + for (i = 0; devsw[i] != NULL; i++) + if (devsw[i]->dv_init != NULL) + (devsw[i]->dv_init)(); extract_currdev(); @@ -146,6 +157,19 @@ extract_currdev(void) //bzero(&dev, sizeof(dev)); +#if defined(USERBOOT_ZFS_SUPPORT) + if (userboot_zfs_found) { + struct zfs_devdesc zdev; + + /* Leave the pool/root guid's unassigned */ + bzero(&zdev, sizeof(zdev)); + zdev.d_dev = &zfs_dev; + zdev.d_type = zdev.d_dev->dv_type; + + dev = *(struct disk_devdesc *)&zdev; + } else +#endif + if (userboot_disk_maxunit > 0) { dev.d_dev = &userboot_disk; dev.d_type = dev.d_dev->dv_type; @@ -172,6 +196,49 @@ extract_currdev(void) env_noset, env_nounset); } +#if defined(USERBOOT_ZFS_SUPPORT) +static void +userboot_zfs_probe(void) +{ + char devname[32]; + uint64_t pool_guid; + int unit; + + /* + * Open all the disks we can find and see if we can reconstruct + * ZFS pools from them. Record if any were found. + */ + for (unit = 0; unit < userboot_disk_maxunit; unit++) { + sprintf(devname, "disk%d:", unit); + pool_guid = 0; + zfs_probe_dev(devname, &pool_guid); + if (pool_guid != 0) + userboot_zfs_found = 1; + } +} + +COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset", + command_lszfs); + +static int +command_lszfs(int argc, char *argv[]) +{ + int err; + + if (argc != 2) { + command_errmsg = "a single dataset must be supplied"; + return (CMD_ERROR); + } + + err = zfs_list(argv[1]); + if (err != 0) { + command_errmsg = strerror(err); + return (CMD_ERROR); + } + return (CMD_OK); +} +#endif /* USERBOOT_ZFS_SUPPORT */ + COMMAND_SET(quit, "quit", "exit the loader", command_quit); static int diff --git a/sys/boot/userboot/zfs/Makefile b/sys/boot/userboot/zfs/Makefile new file mode 100644 index 000000000000..8fe315b38ea8 --- /dev/null +++ b/sys/boot/userboot/zfs/Makefile @@ -0,0 +1,18 @@ +# $FreeBSD$ + +S= ${.CURDIR}/../../zfs + +.PATH: ${S} +LIB= zfsboot +INTERNALLIB= + +SRCS+= zfs.c + +CFLAGS+= -I${.CURDIR}/../../common -I${.CURDIR}/../../.. -I. +CFLAGS+= -I${.CURDIR}/../../../../lib/libstand +CFLAGS+= -I${.CURDIR}/../../../cddl/boot/zfs + +CFLAGS+= -ffreestanding -fPIC +CFLAGS+= -Wformat -Wall + +.include diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c index 701e9c3e71cb..c1a54326360b 100644 --- a/usr.sbin/bhyveload/bhyveload.c +++ b/usr.sbin/bhyveload/bhyveload.c @@ -88,9 +88,12 @@ __FBSDID("$FreeBSD$"); #define GB (1024 * 1024 * 1024UL) #define BSP 0 +#define NDISKS 32 + static char *host_base; static struct termios term, oldterm; -static int disk_fd = -1; +static int disk_fd[NDISKS]; +static int ndisks; static int consin_fd, consout_fd; static char *vmname, *progname; @@ -287,9 +290,9 @@ cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, { ssize_t n; - if (unit != 0 || disk_fd == -1) + if (unit < 0 || unit >= ndisks ) return (EIO); - n = pread(disk_fd, to, size, from); + n = pread(disk_fd[unit], to, size, from); if (n < 0) return (errno); *resid = size - n; @@ -301,7 +304,7 @@ cb_diskioctl(void *arg, int unit, u_long cmd, void *data) { struct stat sb; - if (unit != 0 || disk_fd == -1) + if (unit < 0 || unit >= ndisks) return (EBADF); switch (cmd) { @@ -309,7 +312,7 @@ cb_diskioctl(void *arg, int unit, u_long cmd, void *data) *(u_int *)data = 512; break; case DIOCGMEDIASIZE: - if (fstat(disk_fd, &sb) == 0) + if (fstat(disk_fd[unit], &sb) == 0) *(off_t *)data = sb.st_size; else return (ENOTTY); @@ -601,6 +604,26 @@ altcons_open(char *path) return (err); } +static int +disk_open(char *path) +{ + int err, fd; + + if (ndisks > NDISKS) + return (ERANGE); + + err = 0; + fd = open(path, O_RDONLY); + + if (fd > 0) { + disk_fd[ndisks] = fd; + ndisks++; + } else + err = errno; + + return (err); +} + static void usage(void) { @@ -620,12 +643,10 @@ main(int argc, char** argv) void (*func)(struct loader_callbacks *, void *, int, int); uint64_t mem_size; int opt, error; - char *disk_image; progname = basename(argv[0]); mem_size = 256 * MB; - disk_image = NULL; consin_fd = STDIN_FILENO; consout_fd = STDOUT_FILENO; @@ -637,8 +658,11 @@ main(int argc, char** argv) if (error != 0) errx(EX_USAGE, "Could not open '%s'", optarg); break; + case 'd': - disk_image = optarg; + error = disk_open(optarg); + if (error != 0) + errx(EX_USAGE, "Could not open '%s'", optarg); break; case 'e': @@ -704,12 +728,8 @@ main(int argc, char** argv) return (1); } - if (disk_image) { - disk_fd = open(disk_image, O_RDONLY); - } - addenv("smbios.bios.vendor=BHYVE"); addenv("boot_serial=1"); - func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0); + func(&cb, NULL, USERBOOT_VERSION_3, ndisks); }