From 8bb77249dbbb0a9c358a5ddcdff4e454e26a235d Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Fri, 22 Oct 2010 20:13:45 +0000 Subject: [PATCH 01/65] Add possibility to generate devctl notifications regardless of UGEN presence. Submitted by: Nick Hibma Approved by: thompsa (mentor) --- sys/dev/usb/usb_device.c | 61 +++++++++++++++++++++++++++++---------- sys/dev/usb/usb_freebsd.h | 1 + 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/sys/dev/usb/usb_device.c b/sys/dev/usb/usb_device.c index b68092d0e53f..8887fc16c960 100644 --- a/sys/dev/usb/usb_device.c +++ b/sys/dev/usb/usb_device.c @@ -98,8 +98,10 @@ static void usb_suspend_resume_sub(struct usb_device *, device_t, static void usbd_clear_stall_proc(struct usb_proc_msg *_pm); static usb_error_t usb_config_parse(struct usb_device *, uint8_t, uint8_t); static void usbd_set_device_strings(struct usb_device *); -#if USB_HAVE_UGEN +#if USB_HAVE_DEVCTL static void usb_notify_addq(const char *type, struct usb_device *); +#endif +#if USB_HAVE_UGEN static void usb_fifo_free_wrap(struct usb_device *, uint8_t, uint8_t); static struct cdev *usb_make_dev(struct usb_device *, int, int); static void usb_cdev_create(struct usb_device *); @@ -1858,7 +1860,9 @@ usb_alloc_device(device_t parent_dev, struct usb_bus *bus, printf("%s: <%s> at %s\n", udev->ugen_name, usb_get_manufacturer(udev), device_get_nameunit(udev->bus->bdev)); +#endif +#if USB_HAVE_DEVCTL usb_notify_addq("ATTACH", udev); #endif done: @@ -2004,9 +2008,11 @@ usb_free_device(struct usb_device *udev, uint8_t flag) bus = udev->bus; usb_set_device_state(udev, USB_STATE_DETACHED); -#if USB_HAVE_UGEN +#if USB_HAVE_DEVCTL usb_notify_addq("DETACH", udev); +#endif +#if USB_HAVE_UGEN printf("%s: <%s> at %s (disconnected)\n", udev->ugen_name, usb_get_manufacturer(udev), device_get_nameunit(bus->bdev)); @@ -2373,7 +2379,7 @@ usbd_get_device_index(struct usb_device *udev) return (udev->device_index); } -#if USB_HAVE_UGEN +#if USB_HAVE_DEVCTL /*------------------------------------------------------------------------* * usb_notify_addq * @@ -2409,7 +2415,9 @@ usb_notify_addq_compat(const char *type, struct usb_device *udev) /* String it all together. */ snprintf(data, buf_size, "%s" +#if USB_HAVE_UGEN "%s " +#endif "vendor=0x%04x " "product=0x%04x " "devclass=0x%02x " @@ -2418,20 +2426,27 @@ usb_notify_addq_compat(const char *type, struct usb_device *udev) "release=0x%04x " "at " "port=%u " - "on " - "%s\n", +#if USB_HAVE_UGEN + "on %s\n" +#endif + "", ntype, +#if USB_HAVE_UGEN udev->ugen_name, +#endif UGETW(udev->ddesc.idVendor), UGETW(udev->ddesc.idProduct), udev->ddesc.bDeviceClass, udev->ddesc.bDeviceSubClass, usb_get_serial(udev), UGETW(udev->ddesc.bcdDevice), - udev->port_no, - udev->parent_hub != NULL ? + udev->port_no +#if USB_HAVE_UGEN + , udev->parent_hub != NULL ? udev->parent_hub->ugen_name : - device_get_nameunit(device_get_parent(udev->bus->bdev))); + device_get_nameunit(device_get_parent(udev->bus->bdev)) +#endif + ); devctl_queue_data(data); } @@ -2451,7 +2466,9 @@ usb_notify_addq(const char *type, struct usb_device *udev) /* announce the device */ sb = sbuf_new_auto(); sbuf_printf(sb, - "cdev=%s " +#if USB_HAVE_UGEN + "ugen=%s " +#endif "vendor=0x%04x " "product=0x%04x " "devclass=0x%02x " @@ -2460,8 +2477,13 @@ usb_notify_addq(const char *type, struct usb_device *udev) "release=0x%04x " "mode=%s " "port=%u " - "parent=%s\n", +#if USB_HAVE_UGEN + "parent=%s\n" +#endif + "", +#if USB_HAVE_UGEN udev->ugen_name, +#endif UGETW(udev->ddesc.idVendor), UGETW(udev->ddesc.idProduct), udev->ddesc.bDeviceClass, @@ -2469,10 +2491,13 @@ usb_notify_addq(const char *type, struct usb_device *udev) usb_get_serial(udev), UGETW(udev->ddesc.bcdDevice), (udev->flags.usb_mode == USB_MODE_HOST) ? "host" : "device", - udev->port_no, - udev->parent_hub != NULL ? - udev->parent_hub->ugen_name : - device_get_nameunit(device_get_parent(udev->bus->bdev))); + udev->port_no +#if USB_HAVE_UGEN + , udev->parent_hub != NULL ? + udev->parent_hub->ugen_name : + device_get_nameunit(device_get_parent(udev->bus->bdev)) +#endif + ); sbuf_finish(sb); devctl_notify("USB", "DEVICE", type, sbuf_data(sb)); sbuf_delete(sb); @@ -2487,7 +2512,9 @@ usb_notify_addq(const char *type, struct usb_device *udev) sb = sbuf_new_auto(); sbuf_printf(sb, - "cdev=%s " +#if USB_HAVE_UGEN + "ugen=%s " +#endif "vendor=0x%04x " "product=0x%04x " "devclass=0x%02x " @@ -2500,7 +2527,9 @@ usb_notify_addq(const char *type, struct usb_device *udev) "intclass=0x%02x " "intsubclass=0x%02x " "intprotocol=0x%02x\n", +#if USB_HAVE_UGEN udev->ugen_name, +#endif UGETW(udev->ddesc.idVendor), UGETW(udev->ddesc.idProduct), udev->ddesc.bDeviceClass, @@ -2518,7 +2547,9 @@ usb_notify_addq(const char *type, struct usb_device *udev) sbuf_delete(sb); } } +#endif +#if USB_HAVE_UGEN /*------------------------------------------------------------------------* * usb_fifo_free_wrap * diff --git a/sys/dev/usb/usb_freebsd.h b/sys/dev/usb/usb_freebsd.h index 8a008cd47dd5..efc27f20990d 100644 --- a/sys/dev/usb/usb_freebsd.h +++ b/sys/dev/usb/usb_freebsd.h @@ -33,6 +33,7 @@ /* Default USB configuration */ #define USB_HAVE_UGEN 1 +#define USB_HAVE_DEVCTL 1 #define USB_HAVE_BUSDMA 1 #define USB_HAVE_COMPAT_LINUX 1 #define USB_HAVE_USER_IO 1 From 91027b4ef042a9fa41ead8501c87bbfc539ad262 Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Fri, 22 Oct 2010 21:38:56 +0000 Subject: [PATCH 02/65] Modify the file handle hash function in the experimental NFS server so that it will work better for non-UFS file systems. The new function simply sums the bytes of the fh_fid field of fhandle_t. MFC after: 10 days --- sys/fs/nfs/nfs_var.h | 1 + sys/fs/nfs/nfsdport.h | 2 +- sys/fs/nfsserver/nfs_nfsdport.c | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index d46983fa97ec..03b7a0a934ae 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -576,6 +576,7 @@ void nfsvno_unlockvfs(mount_t); int nfsvno_lockvfs(mount_t); int nfsrv_v4rootexport(void *, struct ucred *, NFSPROC_T *); int nfsvno_testexp(struct nfsrv_descript *, struct nfsexstuff *); +int nfsrv_hashfh(fhandle_t *); /* nfs_commonkrpc.c */ int newnfs_nmcancelreqs(struct nfsmount *); diff --git a/sys/fs/nfs/nfsdport.h b/sys/fs/nfs/nfsdport.h index fdba91699873..4888866d6e17 100644 --- a/sys/fs/nfs/nfsdport.h +++ b/sys/fs/nfs/nfsdport.h @@ -73,7 +73,7 @@ struct nfsexstuff { bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0) #define NFSLOCKHASH(f) \ - (&nfslockhash[(*((u_int32_t *)((f)->fh_fid.fid_data))) % NFSLOCKHASHSIZE]) + (&nfslockhash[nfsrv_hashfh(f) % NFSLOCKHASHSIZE]) #define NFSFPVNODE(f) ((struct vnode *)((f)->f_data)) #define NFSFPCRED(f) ((f)->f_cred) diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 0d35d1d04b0b..b49442fd89ca 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -3087,6 +3087,21 @@ nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) return (1); } +/* + * Calculate a hash value for the fid in a file handle. + */ +int +nfsrv_hashfh(fhandle_t *fhp) +{ + int hashval = 0, i; + uint8_t *cp; + + cp = (uint8_t *)&fhp->fh_fid; + for (i = 0; i < sizeof(struct fid); i++) + hashval += *cp++; + return (hashval); +} + extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* From 1d0214411eb46062ffae9705438aaddf3ad44cf5 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Fri, 22 Oct 2010 22:13:11 +0000 Subject: [PATCH 03/65] Move sc_akeyctx and sc_ivctx initialization to the g_eli_mkey_propagate() function which eliminates code duplication and will ensure proper order of operation. --- sys/geom/eli/g_eli.c | 24 ------------------------ sys/geom/eli/g_eli_ctl.c | 20 ++------------------ sys/geom/eli/g_eli_key.c | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 42 deletions(-) diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 3b9ffdd81b76..fd04ee2648d8 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -817,30 +817,6 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, */ g_eli_mkey_propagate(sc, mkey); sc->sc_ekeylen = md->md_keylen; - if (sc->sc_flags & G_ELI_FLAG_AUTH) { - /* - * Precalculate SHA256 for HMAC key generation. - * This is expensive operation and we can do it only once now or - * for every access to sector, so now will be much better. - */ - SHA256_Init(&sc->sc_akeyctx); - SHA256_Update(&sc->sc_akeyctx, sc->sc_akey, - sizeof(sc->sc_akey)); - } - /* - * Precalculate SHA256 for IV generation. - * This is expensive operation and we can do it only once now or for - * every access to sector, so now will be much better. - */ - switch (sc->sc_ealgo) { - case CRYPTO_AES_XTS: - break; - default: - SHA256_Init(&sc->sc_ivctx); - SHA256_Update(&sc->sc_ivctx, sc->sc_ivkey, - sizeof(sc->sc_ivkey)); - break; - } LIST_INIT(&sc->sc_workers); diff --git a/sys/geom/eli/g_eli_ctl.c b/sys/geom/eli/g_eli_ctl.c index 7147b270df84..f1dbfbd6d46b 100644 --- a/sys/geom/eli/g_eli_ctl.c +++ b/sys/geom/eli/g_eli_ctl.c @@ -882,26 +882,10 @@ g_eli_ctl_resume(struct gctl_req *req, struct g_class *mp) mtx_lock(&sc->sc_queue_mtx); /* Restore sc_mkey, sc_ekeys, sc_akey and sc_ivkey. */ g_eli_mkey_propagate(sc, mkey); - bzero(mkey, sizeof(mkey)); - bzero(&md, sizeof(md)); - /* Restore sc_akeyctx. */ - if (sc->sc_flags & G_ELI_FLAG_AUTH) { - SHA256_Init(&sc->sc_akeyctx); - SHA256_Update(&sc->sc_akeyctx, sc->sc_akey, - sizeof(sc->sc_akey)); - } - /* Restore sc_ivctx. */ - switch (sc->sc_ealgo) { - case CRYPTO_AES_XTS: - break; - default: - SHA256_Init(&sc->sc_ivctx); - SHA256_Update(&sc->sc_ivctx, sc->sc_ivkey, - sizeof(sc->sc_ivkey)); - break; - } sc->sc_flags &= ~G_ELI_FLAG_SUSPEND; mtx_unlock(&sc->sc_queue_mtx); + bzero(mkey, sizeof(mkey)); + bzero(&md, sizeof(md)); G_ELI_DEBUG(1, "Resumed %s.", pp->name); wakeup(sc); } diff --git a/sys/geom/eli/g_eli_key.c b/sys/geom/eli/g_eli_key.c index bb060e46f860..37b2ad3b6273 100644 --- a/sys/geom/eli/g_eli_key.c +++ b/sys/geom/eli/g_eli_key.c @@ -264,5 +264,30 @@ g_eli_mkey_propagate(struct g_eli_softc *sc, const unsigned char *mkey) /* Generate all encryption keys. */ g_eli_ekeys_generate(sc); } + + if (sc->sc_flags & G_ELI_FLAG_AUTH) { + /* + * Precalculate SHA256 for HMAC key generation. + * This is expensive operation and we can do it only once now or + * for every access to sector, so now will be much better. + */ + SHA256_Init(&sc->sc_akeyctx); + SHA256_Update(&sc->sc_akeyctx, sc->sc_akey, + sizeof(sc->sc_akey)); + } + /* + * Precalculate SHA256 for IV generation. + * This is expensive operation and we can do it only once now or for + * every access to sector, so now will be much better. + */ + switch (sc->sc_ealgo) { + case CRYPTO_AES_XTS: + break; + default: + SHA256_Init(&sc->sc_ivctx); + SHA256_Update(&sc->sc_ivctx, sc->sc_ivkey, + sizeof(sc->sc_ivkey)); + break; + } } #endif From 4f294e1289617467137537616bdefee098401c54 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Fri, 22 Oct 2010 22:44:09 +0000 Subject: [PATCH 04/65] Encryption keys array might be NULL if device is suspended. Check for this, so we don't panic when we detach suspended device. --- sys/geom/eli/g_eli.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index fd04ee2648d8..08219dcd7c32 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -952,9 +952,12 @@ g_eli_destroy(struct g_eli_softc *sc, boolean_t force) } mtx_destroy(&sc->sc_queue_mtx); gp->softc = NULL; - bzero(sc->sc_ekeys, - sc->sc_nekeys * (sizeof(uint8_t *) + G_ELI_DATAKEYLEN)); - free(sc->sc_ekeys, M_ELI); + if (sc->sc_ekeys != NULL) { + /* The sc_ekeys field can be NULL is device is suspended. */ + bzero(sc->sc_ekeys, + sc->sc_nekeys * (sizeof(uint8_t *) + G_ELI_DATAKEYLEN)); + free(sc->sc_ekeys, M_ELI); + } bzero(sc, sizeof(*sc)); free(sc, M_ELI); From d8d61ef8fc2428a1879623dcd4d9e2132ef9a73b Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Fri, 22 Oct 2010 22:45:26 +0000 Subject: [PATCH 05/65] Add State tag, so 'geli status' will report active/suspended status, eg: # geli status Name Status Components da0.eli SUSPENDED da0 da1.eli ACTIVE da1 --- sys/geom/eli/g_eli.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 08219dcd7c32..deffeb717a82 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -1247,6 +1247,8 @@ g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, sc->sc_ekeylen); sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_ealgo)); + sbuf_printf(sb, "%s%s\n", indent, + (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE"); } static void From 2f2d7830b5e5df7a0502cb38169f2f0caf7814f9 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Fri, 22 Oct 2010 22:54:26 +0000 Subject: [PATCH 06/65] Close a race between checking if device is already suspended and suspending it. --- sys/geom/eli/g_eli_ctl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sys/geom/eli/g_eli_ctl.c b/sys/geom/eli/g_eli_ctl.c index f1dbfbd6d46b..cad962d4421e 100644 --- a/sys/geom/eli/g_eli_ctl.c +++ b/sys/geom/eli/g_eli_ctl.c @@ -840,10 +840,6 @@ g_eli_ctl_resume(struct gctl_req *req, struct g_class *mp) gctl_error(req, "Provider %s is invalid.", name); return; } - if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) { - gctl_error(req, "Provider %s not suspended.", name); - return; - } cp = LIST_FIRST(&sc->sc_geom->consumer); pp = cp->provider; error = g_eli_read_metadata(mp, pp, &md); @@ -880,14 +876,18 @@ g_eli_ctl_resume(struct gctl_req *req, struct g_class *mp) G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); mtx_lock(&sc->sc_queue_mtx); - /* Restore sc_mkey, sc_ekeys, sc_akey and sc_ivkey. */ - g_eli_mkey_propagate(sc, mkey); - sc->sc_flags &= ~G_ELI_FLAG_SUSPEND; + if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) + gctl_error(req, "Device %s is not suspended.", name); + else { + /* Restore sc_mkey, sc_ekeys, sc_akey and sc_ivkey. */ + g_eli_mkey_propagate(sc, mkey); + sc->sc_flags &= ~G_ELI_FLAG_SUSPEND; + G_ELI_DEBUG(1, "Resumed %s.", pp->name); + wakeup(sc); + } mtx_unlock(&sc->sc_queue_mtx); bzero(mkey, sizeof(mkey)); bzero(&md, sizeof(md)); - G_ELI_DEBUG(1, "Resumed %s.", pp->name); - wakeup(sc); } static int From 0d2f5a4eaa3bd51a091273f76950e893710200a6 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Fri, 22 Oct 2010 22:58:00 +0000 Subject: [PATCH 07/65] - Improve error messages, so instead of 'Not fully done', the user will get information that device is already suspended or that device is using one-time key and suspend is not supported. - 'geli suspend -a' silently skips devices that use one-time key, this is fine, but because we log which device were suspended on the console, log also which devices were skipped. --- sys/geom/eli/g_eli_ctl.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/sys/geom/eli/g_eli_ctl.c b/sys/geom/eli/g_eli_ctl.c index cad962d4421e..24139187e76d 100644 --- a/sys/geom/eli/g_eli_ctl.c +++ b/sys/geom/eli/g_eli_ctl.c @@ -699,22 +699,28 @@ g_eli_ctl_delkey(struct gctl_req *req, struct g_class *mp) G_ELI_DEBUG(1, "Key %d removed from %s.", nkey, pp->name); } -static int -g_eli_suspend_one(struct g_eli_softc *sc) +static void +g_eli_suspend_one(struct g_eli_softc *sc, struct gctl_req *req) { struct g_eli_worker *wr; g_topology_assert(); - if (sc == NULL) - return (ENOENT); - if (sc->sc_flags & G_ELI_FLAG_ONETIME) - return (EOPNOTSUPP); + KASSERT(sc != NULL, ("NULL sc")); + + if (sc->sc_flags & G_ELI_FLAG_ONETIME) { + gctl_error(req, + "Device %s is using one-time key, suspend not supported.", + sc->sc_name); + return; + } mtx_lock(&sc->sc_queue_mtx); if (sc->sc_flags & G_ELI_FLAG_SUSPEND) { mtx_unlock(&sc->sc_queue_mtx); - return (EALREADY); + gctl_error(req, "Device %s already suspended.", + sc->sc_name); + return; } sc->sc_flags |= G_ELI_FLAG_SUSPEND; wakeup(sc); @@ -742,8 +748,7 @@ g_eli_suspend_one(struct g_eli_softc *sc) bzero(sc->sc_ivkey, sizeof(sc->sc_ivkey)); bzero(&sc->sc_ivctx, sizeof(sc->sc_ivctx)); mtx_unlock(&sc->sc_queue_mtx); - G_ELI_DEBUG(0, "%s has been suspended.", sc->sc_name); - return (0); + G_ELI_DEBUG(0, "Device %s has been suspended.", sc->sc_name); } static void @@ -751,7 +756,6 @@ g_eli_ctl_suspend(struct gctl_req *req, struct g_class *mp) { struct g_eli_softc *sc; int *all, *nargs; - int error; g_topology_assert(); @@ -775,11 +779,13 @@ g_eli_ctl_suspend(struct gctl_req *req, struct g_class *mp) LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { sc = gp->softc; - if (sc->sc_flags & G_ELI_FLAG_ONETIME) + if (sc->sc_flags & G_ELI_FLAG_ONETIME) { + G_ELI_DEBUG(0, + "Device %s is using one-time key, suspend not supported, skipping.", + sc->sc_name); continue; - error = g_eli_suspend_one(sc); - if (error != 0) - gctl_error(req, "Not fully done."); + } + g_eli_suspend_one(sc, req); } } else { const char *prov; @@ -799,9 +805,7 @@ g_eli_ctl_suspend(struct gctl_req *req, struct g_class *mp) G_ELI_DEBUG(0, "No such provider: %s.", prov); continue; } - error = g_eli_suspend_one(sc); - if (error != 0) - gctl_error(req, "Not fully done."); + g_eli_suspend_one(sc, req); } } } From 82510b7eca9a22b742e52f731716f8ce178435d8 Mon Sep 17 00:00:00 2001 From: Bernhard Schmidt Date: Sat, 23 Oct 2010 11:26:22 +0000 Subject: [PATCH 08/65] The firmware does pad notifications to an even number of bytes (at least the association notification), the included information though always contains an elem block with an odd number of bytes. We handle the last byte as if it might contain a whole elem block, this of course is not true as one byte is not enough to hold a block, we therefore discard the complete frame. The solution here is to subtract one from the actual notification length, this is also what the Linux driver does. With this change the frames ends exactly where the last elem block ends. This commit also reverts r214160 which is no longer required and now even wrong. MFC after: 1 week --- sys/dev/iwi/if_iwi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/iwi/if_iwi.c b/sys/dev/iwi/if_iwi.c index 62b53be5bcd9..de2bfb13cbbd 100644 --- a/sys/dev/iwi/if_iwi.c +++ b/sys/dev/iwi/if_iwi.c @@ -1356,7 +1356,7 @@ iwi_checkforqos(struct ieee80211vap *vap, wme = NULL; while (frm < efrm) { - IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1], break); + IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1], return); switch (*frm) { case IEEE80211_ELEMID_VENDOR: if (iswmeoui(frm)) @@ -1483,7 +1483,7 @@ iwi_notification_intr(struct iwi_softc *sc, struct iwi_notif *notif) IWI_STATE_END(sc, IWI_FW_ASSOCIATING); iwi_checkforqos(vap, (const struct ieee80211_frame *)(assoc+1), - le16toh(notif->len) - sizeof(*assoc)); + le16toh(notif->len) - sizeof(*assoc) - 1); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); break; case IWI_ASSOC_INIT: From d9962dc58875352341750ee4633b786b9524d80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrich=20Sp=C3=B6rlein?= Date: Sat, 23 Oct 2010 12:27:39 +0000 Subject: [PATCH 09/65] Remove mention of non-existant -o flag for debugging options. MFC after: 3 days --- contrib/bsnmp/snmpd/bsnmpd.1 | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/contrib/bsnmp/snmpd/bsnmpd.1 b/contrib/bsnmp/snmpd/bsnmpd.1 index 06966f06ae60..b609efd21806 100644 --- a/contrib/bsnmp/snmpd/bsnmpd.1 +++ b/contrib/bsnmp/snmpd/bsnmpd.1 @@ -31,7 +31,7 @@ .\" .\" $Begemot: bsnmp/snmpd/bsnmpd.1,v 1.12 2006/02/27 09:50:03 brandt_h Exp $ .\" -.Dd August 16, 2010 +.Dd October 23, 2010 .Dt BSNMPD 1 .Os .Sh NAME @@ -68,11 +68,9 @@ Use .Ar file as configuration file instead of the standard one. .It Fl D Ar options -Debugging options are specified with a -.Fl o -flag followed by a comma separated string of options. +Debugging options are specified as a comma separated string. The following options are available. -.Bl -tag -width ".It Cm trace Ns Cm = Ns Cm level" +.Bl -tag -width "trace=level" .It Cm dump Dump all sent and received PDUs to the terminal. .It Cm events From 0d036d55e7524ba0cb088b6760c0a620a3c86958 Mon Sep 17 00:00:00 2001 From: David Xu Date: Sat, 23 Oct 2010 13:16:39 +0000 Subject: [PATCH 10/65] In thr_exit() and kthread_exit(), only remove thread from hash if it can directly exit, otherwise let exit1() do it. The change should be in r213950, but for unknown reason, it was lost. --- sys/kern/kern_kthread.c | 6 +++++- sys/kern/kern_thr.c | 13 +++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 9ab0922b816f..be40c80e9a89 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -315,17 +316,20 @@ kthread_exit(void) p = curthread->td_proc; - tidhash_remove(curthread); /* A module may be waiting for us to exit. */ wakeup(curthread); + rw_wlock(&tidhash_lock); PROC_LOCK(p); if (p->p_numthreads == 1) { PROC_UNLOCK(p); + rw_wunlock(&tidhash_lock); kproc_exit(0); /* NOTREACHED. */ } + LIST_REMOVE(curthread, td_hash); + rw_wunlock(&tidhash_lock); PROC_SLOCK(p); thread_exit(); } diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 3a9c721dddce..75656f03c186 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -284,23 +285,23 @@ thr_exit(struct thread *td, struct thr_exit_args *uap) kern_umtx_wake(td, uap->state, INT_MAX, 0); } - tidhash_remove(td); - + rw_wlock(&tidhash_lock); PROC_LOCK(p); - tdsigcleanup(td); - PROC_SLOCK(p); - /* * Shutting down last thread in the proc. This will actually * call exit() in the trampoline when it returns. */ if (p->p_numthreads != 1) { + LIST_REMOVE(td, td_hash); + rw_wunlock(&tidhash_lock); + tdsigcleanup(td); + PROC_SLOCK(p); thread_stopped(p); thread_exit(); /* NOTREACHED */ } - PROC_SUNLOCK(p); PROC_UNLOCK(p); + rw_wunlock(&tidhash_lock); return (0); } From 880cb81c5a8add67f4af3245913e3856f14baf1f Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Sat, 23 Oct 2010 14:22:50 +0000 Subject: [PATCH 11/65] Remove workaround for ZFS bug; fix was committed to the //depot/user/pjd/zfs/... branch some time ago. MFC after: two weeks --- sys/kern/subr_acl_nfs4.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/sys/kern/subr_acl_nfs4.c b/sys/kern/subr_acl_nfs4.c index 3498ddfb14f1..cf664bf898b0 100644 --- a/sys/kern/subr_acl_nfs4.c +++ b/sys/kern/subr_acl_nfs4.c @@ -393,28 +393,6 @@ acl_nfs4_trivial_from_mode(struct acl *aclp, mode_t mode) group_deny = everyone_allow & ~group_allow; user_allow_first = group_deny & ~user_deny; -#if 1 - /* - * This is a workaround for what looks like a bug in ZFS - trivial - * ACL for mode 0077 should look like this: - * - * owner@:rwxp----------:------:deny - * owner@:------aARWcCos:------:allow - * group@:rwxp--a-R-c--s:------:allow - * everyone@:rwxp--a-R-c--s:------:allow - * - * Instead, ZFS makes it like this: - * - * owner@:rwx-----------:------:deny - * owner@:------aARWcCos:------:allow - * group@:rwxp--a-R-c--s:------:allow - * everyone@:rwxp--a-R-c--s:------:allow - */ - user_allow_first &= ~ACL_APPEND_DATA; - user_deny &= ~ACL_APPEND_DATA; - group_deny &= ~ACL_APPEND_DATA; -#endif - if (user_allow_first != 0) _acl_append(aclp, ACL_USER_OBJ, user_allow_first, ACL_ENTRY_TYPE_ALLOW); if (user_deny != 0) From a959b1f02cfdbb7e7388aaf5dfed8d53507de186 Mon Sep 17 00:00:00 2001 From: Robert Watson Date: Sat, 23 Oct 2010 16:59:39 +0000 Subject: [PATCH 12/65] Add missing DTrace probe invocation to mac_vnode_check_open; the probe was declared, but never used. MFC after: 3 days Sponsored by: Google, Inc. --- sys/security/mac/mac_vfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/security/mac/mac_vfs.c b/sys/security/mac/mac_vfs.c index dcd29aa23ecc..c4f305bbc9d5 100644 --- a/sys/security/mac/mac_vfs.c +++ b/sys/security/mac/mac_vfs.c @@ -637,6 +637,8 @@ mac_vnode_check_open(struct ucred *cred, struct vnode *vp, accmode_t accmode) ASSERT_VOP_LOCKED(vp, "mac_vnode_check_open"); MAC_POLICY_CHECK(vnode_check_open, cred, vp, vp->v_label, accmode); + MAC_CHECK_PROBE3(vnode_check_open, error, cred, vp, accmode); + return (error); } From 4a85b5e2eaaed628f6d19891764df6a5a70005f1 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Sat, 23 Oct 2010 20:35:40 +0000 Subject: [PATCH 13/65] Make the IPsec SADB embedded route cache a union to be able to hold both the legacy and IPv6 route destination address. Previously in case of IPv6, there was a memory overwrite due to not enough space for the IPv6 address. PR: kern/122565 MFC After: 2 weeks --- sys/netinet/ip_ipsec.c | 2 +- sys/netinet6/ip6_ipsec.c | 2 +- sys/netipsec/ipsec_output.c | 3 ++- sys/netipsec/key.c | 8 ++++---- sys/netipsec/keydb.h | 8 +++++++- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c index 3465d4b5f5b5..50a6ce44a490 100644 --- a/sys/netinet/ip_ipsec.c +++ b/sys/netinet/ip_ipsec.c @@ -239,7 +239,7 @@ ip_ipsec_mtu(struct mbuf *m, int mtu) if (sp->req != NULL && sp->req->sav != NULL && sp->req->sav->sah != NULL) { - ro = &sp->req->sav->sah->sa_route; + ro = &sp->req->sav->sah->route_cache.sa_route; if (ro->ro_rt && ro->ro_rt->rt_ifp) { mtu = ro->ro_rt->rt_rmx.rmx_mtu ? diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c index 48d91628c29b..96b09ef8b4c9 100644 --- a/sys/netinet6/ip6_ipsec.c +++ b/sys/netinet6/ip6_ipsec.c @@ -366,7 +366,7 @@ ip6_ipsec_mtu(struct mbuf *m) if (sp->req != NULL && sp->req->sav != NULL && sp->req->sav->sah != NULL) { - ro = &sp->req->sav->sah->sa_route; + ro = &sp->req->sav->sah->route_cache.sa_route; if (ro->ro_rt && ro->ro_rt->rt_ifp) { mtu = ro->ro_rt->rt_rmx.rmx_mtu ? diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 2701796f8c06..0907f4575518 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -829,7 +829,8 @@ ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int } ip6 = mtod(m, struct ip6_hdr *); - state->ro = &isr->sav->sah->sa_route; + state->ro = + (struct route *)&isr->sav->sah->route_cache.sin6_route; state->dst = (struct sockaddr *)&state->ro->ro_dst; dst6 = (struct sockaddr_in6 *)state->dst; if (state->ro->ro_rt diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index d00489db9077..e57eb4432bbc 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -2758,9 +2758,9 @@ key_delsah(sah) /* remove from tree of SA index */ if (__LIST_CHAINED(sah)) LIST_REMOVE(sah, chain); - if (sah->sa_route.ro_rt) { - RTFREE(sah->sa_route.ro_rt); - sah->sa_route.ro_rt = (struct rtentry *)NULL; + if (sah->route_cache.sa_route.ro_rt) { + RTFREE(sah->route_cache.sa_route.ro_rt); + sah->route_cache.sa_route.ro_rt = (struct rtentry *)NULL; } free(sah, M_IPSEC_SAH); } @@ -7925,7 +7925,7 @@ key_sa_routechange(dst) SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { - ro = &sah->sa_route; + ro = &sah->route_cache.sa_route; if (ro->ro_rt && dst->sa_len == ro->ro_dst.sa_len && bcmp(dst, &ro->ro_dst, dst->sa_len) == 0) { RTFREE(ro->ro_rt); diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h index 07e1f6024e0c..7494f5f4f3f9 100644 --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -85,6 +85,12 @@ struct seclifetime { u_int64_t usetime; }; +union sa_route_union { + struct route sa_route; + struct route sin_route; /* Duplicate for consistency. */ + struct route_in6 sin6_route; +}; + /* Security Association Data Base */ struct secashead { LIST_ENTRY(secashead) chain; @@ -100,7 +106,7 @@ struct secashead { /* SA chain */ /* The first of this list is newer SA */ - struct route sa_route; /* route cache */ + union sa_route_union route_cache; }; struct xformsw; From ca4f898699c0df644c89fca19e745f2380577d12 Mon Sep 17 00:00:00 2001 From: Pyun YongHyeon Date: Sat, 23 Oct 2010 21:25:50 +0000 Subject: [PATCH 14/65] Apply the same workaround for SDI flow control used on BCM5906 A1 to BCM6906 A0/A2. This should fix a long standing BCM5906 A2 lockup issues. Data sheet explicitly mentions BCM5906 A0, A1 and A2 use de-pipelined mode on these revisions. Special thanks to Buganini who tried all combinations of experimental patches for more than 10 days. Tested by: Buganini gmail dot com > --- sys/dev/bge/if_bge.c | 13 ++++++++----- sys/dev/bge/if_bgereg.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sys/dev/bge/if_bge.c b/sys/dev/bge/if_bge.c index 95de34cb0bb4..92b39f3c0910 100644 --- a/sys/dev/bge/if_bge.c +++ b/sys/dev/bge/if_bge.c @@ -1693,11 +1693,14 @@ bge_blockinit(struct bge_softc *sc) bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); } - /* Choose de-pipeline mode for BCM5906 A1. */ - if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && - sc->bge_chiprev == BGE_CHIPID_BCM5906_A1) - CSR_WRITE_4(sc, BGE_ISO_PKT_TX, - (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); + /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */ + if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { + if (sc->bge_chiprev == BGE_CHIPID_BCM5906_A0 || + sc->bge_chiprev == BGE_CHIPID_BCM5906_A1 || + sc->bge_chiprev == BGE_CHIPID_BCM5906_A2) + CSR_WRITE_4(sc, BGE_ISO_PKT_TX, + (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); + } /* * The BD ring replenish thresholds control how often the * hardware fetches new BD's from the producer rings in host diff --git a/sys/dev/bge/if_bgereg.h b/sys/dev/bge/if_bgereg.h index a4f3f834b617..a50bf59280aa 100644 --- a/sys/dev/bge/if_bgereg.h +++ b/sys/dev/bge/if_bgereg.h @@ -306,6 +306,7 @@ #define BGE_CHIPID_BCM5787_A0 0xb000 #define BGE_CHIPID_BCM5787_A1 0xb001 #define BGE_CHIPID_BCM5787_A2 0xb002 +#define BGE_CHIPID_BCM5906_A0 0xc000 #define BGE_CHIPID_BCM5906_A1 0xc001 #define BGE_CHIPID_BCM5906_A2 0xc002 #define BGE_CHIPID_BCM57780_A0 0x57780000 From 377c50f67aa2f0c7c1edd41b74122f41ede03ada Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Sat, 23 Oct 2010 22:28:29 +0000 Subject: [PATCH 15/65] Modify the experimental NFSv4 server's file handle hash function to use the generic hash32_buf() function. Although adding the bytes seemed sufficient for UFS and ZFS, since most of the bytes are the same for file handles on the same volume, this might not be sufficient for other file systems. Use of a generic function also seems preferable to one specific to NFSv4. Suggested by: gleb.kurtsou at gmail.com MFC after: 10 days --- sys/fs/nfs/nfs_var.h | 2 +- sys/fs/nfsserver/nfs_nfsdport.c | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 03b7a0a934ae..0fdb61dd4d7f 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -576,7 +576,7 @@ void nfsvno_unlockvfs(mount_t); int nfsvno_lockvfs(mount_t); int nfsrv_v4rootexport(void *, struct ucred *, NFSPROC_T *); int nfsvno_testexp(struct nfsrv_descript *, struct nfsexstuff *); -int nfsrv_hashfh(fhandle_t *); +uint32_t nfsrv_hashfh(fhandle_t *); /* nfs_commonkrpc.c */ int newnfs_nmcancelreqs(struct nfsmount *); diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index b49442fd89ca..380aa7210367 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); */ #include +#include #include #include #include @@ -3090,15 +3091,12 @@ nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) /* * Calculate a hash value for the fid in a file handle. */ -int +uint32_t nfsrv_hashfh(fhandle_t *fhp) { - int hashval = 0, i; - uint8_t *cp; + uint32_t hashval; - cp = (uint8_t *)&fhp->fh_fid; - for (i = 0; i < sizeof(struct fid); i++) - hashval += *cp++; + hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } From a9d0c8490967f55b3776afbdb732e76fedd9af30 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Sun, 24 Oct 2010 01:05:10 +0000 Subject: [PATCH 16/65] Move variable declarations into the conditional block where they are used, to fix warning if WITH_SSL is not set. Submitted by: Sean Bruno MFC after: 1 week --- lib/libfetch/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libfetch/common.c b/lib/libfetch/common.c index adeeb455e607..ff10d27b419e 100644 --- a/lib/libfetch/common.c +++ b/lib/libfetch/common.c @@ -321,9 +321,9 @@ fetch_connect(const char *host, int port, int af, int verbose) int fetch_ssl(conn_t *conn, int verbose) { +#ifdef WITH_SSL int ret, ssl_err; -#ifdef WITH_SSL /* Init the SSL library and context */ if (!SSL_library_init()){ fprintf(stderr, "SSL library init failed\n"); From 6d6f51376332aeee5c2d77cf2019daf01d84ff16 Mon Sep 17 00:00:00 2001 From: Yoshihiro Takahashi Date: Sun, 24 Oct 2010 02:59:02 +0000 Subject: [PATCH 17/65] MFi386: revision 214210 Avoid using memcpy() for copying 32bit chunks. This shrinks the resulting code a little. --- sys/boot/pc98/boot2/boot2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/boot/pc98/boot2/boot2.c b/sys/boot/pc98/boot2/boot2.c index 54082ad207fa..259d5de93fe3 100644 --- a/sys/boot/pc98/boot2/boot2.c +++ b/sys/boot/pc98/boot2/boot2.c @@ -485,7 +485,7 @@ load(void) return; p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); - memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); + *(uint32_t*)p = hdr.ex.a_syms; p += sizeof(hdr.ex.a_syms); if (hdr.ex.a_syms) { if (xfsread(ino, p, hdr.ex.a_syms)) @@ -522,7 +522,7 @@ load(void) if (xfsread(ino, &es, sizeof(es))) return; for (i = 0; i < 2; i++) { - memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); + *(Elf32_Word *)p = es[i].sh_size; p += sizeof(es[i].sh_size); fs_off = es[i].sh_offset; if (xfsread(ino, p, es[i].sh_size)) From fcaae21d9280fb262a0f5f3860d236d8e7c1bedd Mon Sep 17 00:00:00 2001 From: Yoshihiro Takahashi Date: Sun, 24 Oct 2010 03:20:54 +0000 Subject: [PATCH 18/65] MFi386: the part of revision 213226. Rewrite the i386 memory probe: - Move the base memory setup into a new basemem_setup() routine. MFC after: 1 week --- sys/pc98/pc98/machdep.c | 99 +++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index c7c4c96d9854..32328c5b9c5a 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1776,52 +1776,13 @@ sdtossd(sd, ssd) ssd->ssd_gran = sd->sd_gran; } -/* - * Populate the (physmap) array with base/bound pairs describing the - * available physical memory in the system, then test this memory and - * build the phys_avail array describing the actually-available memory. - * - * If we cannot accurately determine the physical memory map, then use - * value from the 0xE801 call, and failing that, the RTC. - * - * Total memory size may be set by the kernel environment variable - * hw.physmem or the compile-time define MAXMEM. - * - * XXX first should be vm_paddr_t. - */ static void -getmemsize(int first) +basemem_setup(void) { - int i, off, physmap_idx, pa_indx, da_indx; - int pg_n; - u_long physmem_tunable; - u_int extmem; - u_int under16; - vm_paddr_t pa, physmap[PHYSMAP_SIZE]; + vm_paddr_t pa; pt_entry_t *pte; - quad_t dcons_addr, dcons_size; + int i; - bzero(physmap, sizeof(physmap)); - - /* XXX - some of EPSON machines can't use PG_N */ - pg_n = PG_N; - if (pc98_machine_type & M_EPSON_PC98) { - switch (epson_machine_id) { -#ifdef WB_CACHE - default: -#endif - case EPSON_PC486_HX: - case EPSON_PC486_HG: - case EPSON_PC486_HA: - pg_n = 0; - break; - } - } - - /* - * Perform "base memory" related probes & setup - */ - under16 = pc98_getmemsize(&basemem, &extmem); if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); @@ -1853,12 +1814,62 @@ getmemsize(int first) pmap_kenter(KERNBASE + pa, pa); /* - * if basemem != 640, map pages r/w into vm86 page table so - * that the bios can scribble on it. + * Map pages between basemem and ISA_HOLE_START, if any, r/w into + * the vm86 page table so that vm86 can scribble on them using + * the vm86 map too. XXX: why 2 ways for this and only 1 way for + * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; +} + +/* + * Populate the (physmap) array with base/bound pairs describing the + * available physical memory in the system, then test this memory and + * build the phys_avail array describing the actually-available memory. + * + * If we cannot accurately determine the physical memory map, then use + * value from the 0xE801 call, and failing that, the RTC. + * + * Total memory size may be set by the kernel environment variable + * hw.physmem or the compile-time define MAXMEM. + * + * XXX first should be vm_paddr_t. + */ +static void +getmemsize(int first) +{ + int off, physmap_idx, pa_indx, da_indx; + u_long physmem_tunable; + vm_paddr_t physmap[PHYSMAP_SIZE]; + pt_entry_t *pte; + quad_t dcons_addr, dcons_size; + int i; + int pg_n; + u_int extmem; + u_int under16; + vm_paddr_t pa; + + bzero(physmap, sizeof(physmap)); + + /* XXX - some of EPSON machines can't use PG_N */ + pg_n = PG_N; + if (pc98_machine_type & M_EPSON_PC98) { + switch (epson_machine_id) { +#ifdef WB_CACHE + default: +#endif + case EPSON_PC486_HX: + case EPSON_PC486_HG: + case EPSON_PC486_HA: + pg_n = 0; + break; + } + } + + under16 = pc98_getmemsize(&basemem, &extmem); + basemem_setup(); physmap[0] = 0; physmap[1] = basemem * 1024; From 5bf52f1fb54e00709d477ee0c808c9e8a2d6cb59 Mon Sep 17 00:00:00 2001 From: "Jayachandran C." Date: Sun, 24 Oct 2010 05:22:07 +0000 Subject: [PATCH 19/65] Fix PIC_RETURN when abicalls are not defined. Submitted by: Artem Belevich (artemb at gmail dot com) --- lib/libc/mips/SYS.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/mips/SYS.h b/lib/libc/mips/SYS.h index 7f2eeff5427d..10205b8793a4 100644 --- a/lib/libc/mips/SYS.h +++ b/lib/libc/mips/SYS.h @@ -91,7 +91,7 @@ #else # define PIC_PROLOGUE(x) # define PIC_TAILCALL(l) j _C_LABEL(l) -# define PIC_RETURN() +# define PIC_RETURN() j ra #endif /* __ABICALLS__ */ # define SYSTRAP(x) li v0,SYS_ ## x; syscall; From 6e4d736661e55467284b40925ded8fe453b7b5f8 Mon Sep 17 00:00:00 2001 From: Robert Watson Date: Sun, 24 Oct 2010 09:14:21 +0000 Subject: [PATCH 20/65] Add microbenchmark for create/unlink of a zero-byte file. Sponsored by: Google, Inc. MFC after: 2 weeks --- tools/tools/syscall_timing/syscall_timing.c | 35 +++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tools/tools/syscall_timing/syscall_timing.c b/tools/tools/syscall_timing/syscall_timing.c index 0bab9830f70d..2fb2add241fd 100644 --- a/tools/tools/syscall_timing/syscall_timing.c +++ b/tools/tools/syscall_timing/syscall_timing.c @@ -259,6 +259,34 @@ test_socketpair_dgram(uintmax_t num, uintmax_t int_arg, const char *path) return (i); } +uintmax_t +test_create_unlink(uintmax_t num, uintmax_t int_arg, const char *path) +{ + uintmax_t i; + int fd; + + (void)unlink(path); + fd = open(path, O_RDWR | O_CREAT, 0600); + if (fd < 0) + err(-1, "test_create_unlink: create: %s", path); + close(fd); + if (unlink(path) < 0) + err(-1, "test_create_unlink: unlink: %s", path); + benchmark_start(); + for (i = 0; i < num; i++) { + if (alarm_fired) + break; + fd = open(path, O_RDWR | O_CREAT, 0600); + if (fd < 0) + err(-1, "test_create_unlink: create: %s", path); + close(fd); + if (unlink(path) < 0) + err(-1, "test_create_unlink: unlink: %s", path); + } + benchmark_stop(); + return (i); +} + uintmax_t test_open_close(uintmax_t num, uintmax_t int_arg, const char *path) { @@ -292,7 +320,7 @@ test_read(uintmax_t num, uintmax_t int_arg, const char *path) fd = open(path, O_RDONLY); if (fd < 0) - err(-1, "test_open_close: %s", path); + err(-1, "test_open_read: %s", path); (void)pread(fd, buf, int_arg, 0); benchmark_start(); @@ -315,7 +343,7 @@ test_open_read_close(uintmax_t num, uintmax_t int_arg, const char *path) fd = open(path, O_RDONLY); if (fd < 0) - err(-1, "test_open_close: %s", path); + err(-1, "test_open_read_close: %s", path); (void)read(fd, buf, int_arg); close(fd); @@ -325,7 +353,7 @@ test_open_read_close(uintmax_t num, uintmax_t int_arg, const char *path) break; fd = open(path, O_RDONLY); if (fd < 0) - err(-1, "test_open_close: %s", path); + err(-1, "test_open_read_close: %s", path); (void)read(fd, buf, int_arg); close(fd); } @@ -587,6 +615,7 @@ static const struct test tests[] = { { "socketpair_dgram", test_socketpair_dgram }, { "socket_tcp", test_socket_stream, .t_int = PF_INET }, { "socket_udp", test_socket_dgram, .t_int = PF_INET }, + { "create_unlink", test_create_unlink, .t_flags = FLAG_PATH }, { "open_close", test_open_close, .t_flags = FLAG_PATH }, { "open_read_close_1", test_open_read_close, .t_flags = FLAG_PATH, .t_int = 1 }, From f6613deb1ff2cc15a603480c6f93523784740431 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Sun, 24 Oct 2010 11:37:01 +0000 Subject: [PATCH 21/65] - Take advantage of mii_phy_dev_probe(). - Use mii_phy_add_media() instead of mii_add_media(). I'm not sure how this driver actually managed to work before as mii_add_media() is intended to be used to gether with mii_anar() while mii_phy_add_media() is intended to be used with mii_phy_setmedia(), however this driver mii_add_media() along with mii_phy_setmedia(). --- sys/dev/mii/smcphy.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/sys/dev/mii/smcphy.c b/sys/dev/mii/smcphy.c index f356fdb7e94b..f442858eaa55 100644 --- a/sys/dev/mii/smcphy.c +++ b/sys/dev/mii/smcphy.c @@ -76,20 +76,16 @@ static driver_t smcphy_driver = { DRIVER_MODULE(smcphy, miibus, smcphy_driver, smcphy_devclass, 0, 0); +static const struct mii_phydesc smcphys[] = { + MII_PHY_DESC(SMSC, LAN83C183), + MII_PHY_END +}; + static int smcphy_probe(device_t dev) { - struct mii_attach_args *ma; - ma = device_get_ivars(dev); - - if (MII_OUI(ma->mii_id1, ma->mii_id2) != MII_OUI_SMSC || - MII_MODEL(ma->mii_id2) != MII_MODEL_SMSC_LAN83C183) - return (ENXIO); - - device_set_desc(dev, MII_STR_SMSC_LAN83C183); - - return (0); + return (mii_phy_dev_probe(dev, smcphys, BUS_PROBE_DEFAULT)); } static int @@ -111,7 +107,7 @@ smcphy_attach(device_t dev) sc->mii_service = smcphy_service; sc->mii_pdata = mii; - sc->mii_flags |= MIIF_NOISOLATE; + sc->mii_flags |= MIIF_NOISOLATE | MIIF_NOLOOP; if (smcphy_reset(sc) != 0) { device_printf(dev, "reset failed\n"); @@ -122,7 +118,7 @@ smcphy_attach(device_t dev) sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; device_printf(dev, " "); - mii_add_media(sc); + mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); From 743d2b468a3cce9d13b32457cf79fb258d0ac5c6 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Sun, 24 Oct 2010 11:38:25 +0000 Subject: [PATCH 22/65] Take advantage of mii_phy_add_media()/mii_phy_setmedia(). --- sys/dev/mii/pnaphy.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/sys/dev/mii/pnaphy.c b/sys/dev/mii/pnaphy.c index 69c41d66ee45..159575bdf6a6 100644 --- a/sys/dev/mii/pnaphy.c +++ b/sys/dev/mii/pnaphy.c @@ -102,7 +102,6 @@ pnaphy_attach(device_t dev) struct mii_softc *sc; struct mii_attach_args *ma; struct mii_data *mii; - const char *sep = ""; sc = device_get_softc(dev); ma = device_get_ivars(dev); @@ -116,29 +115,16 @@ pnaphy_attach(device_t dev) sc->mii_service = pnaphy_service; sc->mii_pdata = mii; - sc->mii_flags |= MIIF_NOISOLATE; - -#define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL) -#define PRINT(s) printf("%s%s", sep, s); sep = ", " + sc->mii_flags |= MIIF_NOISOLATE | MIIF_NOLOOP | MIIF_IS_HPNA; mii_phy_reset(sc); sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; device_printf(dev, " "); - if ((sc->mii_capabilities & BMSR_MEDIAMASK) == 0) - printf("no media present"); - else { - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_HPNA_1, 0, sc->mii_inst), 0); - PRINT("HomePNA"); - } - + mii_phy_add_media(sc); printf("\n"); -#undef ADD -#undef PRINT - MIIBUS_MEDIAINIT(sc->mii_dev); - return (0); } @@ -159,17 +145,11 @@ pnaphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd) break; switch (IFM_SUBTYPE(ife->ifm_media)) { - case IFM_AUTO: - case IFM_10_T: - case IFM_100_TX: - case IFM_100_T4: - return (EINVAL); + case IFM_HPNA_1: + mii_phy_setmedia(sc); + break; default: - /* - * BMCR data is stored in the ifmedia entry. - */ - PHY_WRITE(sc, MII_ANAR, mii_anar(ife->ifm_media)); - PHY_WRITE(sc, MII_BMCR, ife->ifm_data); + return (EINVAL); } break; @@ -182,7 +162,7 @@ pnaphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd) /* Update the media status. */ ukphy_status(sc); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_10_T) - mii->mii_media_active = IFM_ETHER|IFM_HPNA_1; + mii->mii_media_active = IFM_ETHER | IFM_HPNA_1; /* Callback if something changed. */ mii_phy_update(sc, cmd); From bcbab52daf8f3fb55c4f6409b6305c68f3efa648 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Sun, 24 Oct 2010 12:51:02 +0000 Subject: [PATCH 23/65] - Add IFM_10_2 and IFM_10_5 media via tlphy(4) only in case the respective interface also has such connectors. - In tl_attach() unify three different ways of obtaining the device and vendor IDs and remove the now obsolete tl_dinfo from tl_softc. - Given that tlphy(4) only handles the integrated PHYs of NICs driven by tl(4) make it only probe on the latter. - Switch mlphy(4) and tlphy(4) to use mii_phy_add_media()/mii_phy_setmedia(). - Simplify looking for the respective companion PHY in mlphy(4) and tlphy(4) by ignoring the native one by just comparing the device_t's directly rather than the device name. --- sys/dev/mii/mlphy.c | 41 +++++++++++++--------------- sys/dev/mii/tlphy.c | 63 +++++++++++++++++++++++-------------------- sys/dev/tl/if_tl.c | 24 ++++++++++++----- sys/dev/tl/if_tlreg.h | 1 - 4 files changed, 71 insertions(+), 58 deletions(-) diff --git a/sys/dev/mii/mlphy.c b/sys/dev/mii/mlphy.c index 04e4b80f7129..ccfcd79545f0 100644 --- a/sys/dev/mii/mlphy.c +++ b/sys/dev/mii/mlphy.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); struct mlphy_softc { struct mii_softc ml_mii; + device_t ml_dev; int ml_state; int ml_linked; }; @@ -96,6 +97,7 @@ static driver_t mlphy_driver = { DRIVER_MODULE(mlphy, miibus, mlphy_driver, mlphy_devclass, 0, 0); +static struct mii_softc *mlphy_find_other(struct mlphy_softc *); static int mlphy_service(struct mii_softc *, struct mii_data *, int); static void mlphy_reset(struct mii_softc *); static void mlphy_status(struct mii_softc *); @@ -105,10 +107,8 @@ mlphy_probe(dev) device_t dev; { struct mii_attach_args *ma; - device_t parent; ma = device_get_ivars(dev); - parent = device_get_parent(device_get_parent(dev)); /* * Micro Linear PHY reports oui == 0 model == 0 @@ -122,7 +122,8 @@ mlphy_probe(dev) * encountered the 6692 on an Olicom card with a ThunderLAN * controller chip. */ - if (strcmp(device_get_name(parent), "tl") != 0) + if (strcmp(device_get_name(device_get_parent(device_get_parent(dev))), + "tl") != 0) return (ENXIO); device_set_desc(dev, "Micro Linear 6692 media interface"); @@ -141,6 +142,7 @@ mlphy_attach(dev) msc = device_get_softc(dev); sc = &msc->ml_mii; + msc->ml_dev = dev; ma = device_get_ivars(dev); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; @@ -155,14 +157,15 @@ mlphy_attach(dev) #define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL) ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_LOOP, sc->mii_inst), - BMCR_LOOP|BMCR_S100); + MII_MEDIA_100_TX); mii_phy_reset(sc); sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; + /* Let the companion PHY (if any) only handle the media we don't. */ ma->mii_capmask = ~sc->mii_capabilities; device_printf(dev, " "); - mii_add_media(sc); + mii_phy_add_media(sc); printf("\n"); #undef ADD MIIBUS_MEDIAINIT(sc->mii_dev); @@ -170,20 +173,21 @@ mlphy_attach(dev) } static struct mii_softc * -mlphy_find_other(device_t mii) +mlphy_find_other(struct mlphy_softc *msc) { device_t *devlist; struct mii_softc *retval; int i, devs; retval = NULL; - if (device_get_children(mii, &devlist, &devs)) + if (device_get_children(msc->ml_mii.mii_dev, &devlist, &devs) != 0) return (NULL); - for (i = 0; i < devs; i++) - if (strcmp(device_get_name(devlist[i]), "mlphy")) { + for (i = 0; i < devs; i++) { + if (devlist[i] != msc->ml_dev) { retval = device_get_softc(devlist[i]); break; } + } free(devlist, M_TEMP); return (retval); } @@ -204,7 +208,7 @@ mlphy_service(xsc, mii, cmd) * See if there's another PHY on this bus with us. * If so, we may need it for 10Mbps modes. */ - other = mlphy_find_other(msc->ml_mii.mii_dev); + other = mlphy_find_other(msc); switch (cmd) { case MII_POLLSTAT: @@ -229,7 +233,7 @@ mlphy_service(xsc, mii, cmd) mii_phy_reset(other); PHY_WRITE(other, MII_BMCR, BMCR_ISO); } - (void) mii_phy_auto(sc); + (void)mii_phy_auto(sc); msc->ml_linked = 0; return (0); case IFM_10_T: @@ -246,8 +250,7 @@ mlphy_service(xsc, mii, cmd) mii_phy_reset(other); PHY_WRITE(other, MII_BMCR, ife->ifm_data); } - PHY_WRITE(sc, MII_ANAR, mii_anar(ife->ifm_media)); - PHY_WRITE(sc, MII_BMCR, ife->ifm_data); + mii_phy_setmedia(sc); msc->ml_state = 0; break; case IFM_100_TX: @@ -262,17 +265,11 @@ mlphy_service(xsc, mii, cmd) mii_phy_reset(other); PHY_WRITE(other, MII_BMCR, BMCR_ISO); } - PHY_WRITE(sc, MII_ANAR, mii_anar(ife->ifm_media)); - PHY_WRITE(sc, MII_BMCR, ife->ifm_data); + mii_phy_setmedia(sc); msc->ml_state = 0; break; - case IFM_100_T4: - /* - * XXX Not supported as a manual setting right now. - */ - return (EINVAL); default: - break; + return (EINVAL); } break; @@ -381,7 +378,7 @@ mlphy_status(sc) struct mii_softc *other = NULL; /* See if there's another PHY on the bus with us. */ - other = mlphy_find_other(msc->ml_mii.mii_dev); + other = mlphy_find_other(msc); if (other == NULL) return; diff --git a/sys/dev/mii/tlphy.c b/sys/dev/mii/tlphy.c index e618611d8b03..d23cfb3973dd 100644 --- a/sys/dev/mii/tlphy.c +++ b/sys/dev/mii/tlphy.c @@ -124,6 +124,9 @@ static int tlphy_probe(device_t dev) { + if (strcmp(device_get_name(device_get_parent(device_get_parent(dev))), + "tl") != 0) + return (ENXIO); return (mii_phy_dev_probe(dev, tlphys, BUS_PROBE_DEFAULT)); } @@ -150,11 +153,17 @@ tlphy_attach(device_t dev) sc->sc_mii.mii_service = tlphy_service; sc->sc_mii.mii_pdata = mii; + /* + * Note that if we're on a device that also supports 100baseTX, + * we are not going to want to use the built-in 10baseT port, + * since there will be another PHY on the MII wired up to the + * UTP connector. + */ capmask = BMSR_DEFCAPMASK; if (mii->mii_instance && device_get_children(sc->sc_mii.mii_dev, &devlist, &devs) == 0) { for (i = 0; i < devs; i++) { - if (strcmp(device_get_name(devlist[i]), "tlphy")) { + if (devlist[i] != dev) { other = device_get_softc(devlist[i]); capmask &= ~other->mii_capabilities; break; @@ -167,38 +176,36 @@ tlphy_attach(device_t dev) mii_phy_reset(&sc->sc_mii); - /* - * Note that if we're on a device that also supports 100baseTX, - * we are not going to want to use the built-in 10baseT port, - * since there will be another PHY on the MII wired up to the - * UTP connector. The parent indicates this to us by specifying - * the TLPHY_MEDIA_NO_10_T bit. - */ sc->sc_mii.mii_capabilities = PHY_READ(&sc->sc_mii, MII_BMSR) & capmask; #define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL) - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_NONE, 0, sc->sc_mii.mii_inst), - BMCR_ISO); - - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, IFM_LOOP, - sc->sc_mii.mii_inst), BMCR_LOOP); + ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, IFM_LOOP, sc->sc_mii.mii_inst), + MII_MEDIA_100_TX); #define PRINT(s) printf("%s%s", sep, s); sep = ", " - device_printf(dev, " "); - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_2, 0, sc->sc_mii.mii_inst), 0); - PRINT("10base2/BNC"); - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_5, 0, sc->sc_mii.mii_inst), 0); - PRINT("10base5/AUI"); - - if (sc->sc_mii.mii_capabilities & BMSR_MEDIAMASK) { - printf("%s", sep); - mii_add_media(&sc->sc_mii); + if ((sc->sc_mii.mii_flags & (MIIF_MACPRIV0 | MIIF_MACPRIV1)) != 0 && + (sc->sc_mii.mii_capabilities & BMSR_MEDIAMASK) != 0) + device_printf(dev, " "); + if ((sc->sc_mii.mii_flags & MIIF_MACPRIV0) != 0) { + ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_2, 0, sc->sc_mii.mii_inst), + 0); + PRINT("10base2/BNC"); } - - printf("\n"); + if ((sc->sc_mii.mii_flags & MIIF_MACPRIV1) != 0) { + ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_5, 0, sc->sc_mii.mii_inst), + 0); + PRINT("10base5/AUI"); + } + if ((sc->sc_mii.mii_capabilities & BMSR_MEDIAMASK) != 0) { + printf("%s", sep); + mii_phy_add_media(&sc->sc_mii); + } + if ((sc->sc_mii.mii_flags & (MIIF_MACPRIV0 | MIIF_MACPRIV1)) != 0 && + (sc->sc_mii.mii_capabilities & BMSR_MEDIAMASK) != 0) + printf("\n"); #undef ADD #undef PRINT MIIBUS_MEDIAINIT(sc->sc_mii.mii_dev); @@ -233,7 +240,7 @@ tlphy_service(struct mii_softc *self, struct mii_data *mii, int cmd) * an autonegotiation cycle, so there's no such * thing as "already in auto mode". */ - (void) tlphy_auto(sc); + (void)tlphy_auto(sc); break; case IFM_10_2: case IFM_10_5: @@ -244,9 +251,7 @@ tlphy_service(struct mii_softc *self, struct mii_data *mii, int cmd) default: PHY_WRITE(&sc->sc_mii, MII_TLPHY_CTRL, 0); DELAY(100000); - PHY_WRITE(&sc->sc_mii, MII_ANAR, - mii_anar(ife->ifm_media)); - PHY_WRITE(&sc->sc_mii, MII_BMCR, ife->ifm_data); + mii_phy_setmedia(&sc->sc_mii); } break; @@ -283,7 +288,7 @@ tlphy_service(struct mii_softc *self, struct mii_data *mii, int cmd) sc->sc_mii.mii_ticks = 0; mii_phy_reset(&sc->sc_mii); - tlphy_auto(sc); + (void)tlphy_auto(sc); return (0); } diff --git a/sys/dev/tl/if_tl.c b/sys/dev/tl/if_tl.c index 56f51dfa2431..a256552c0ab9 100644 --- a/sys/dev/tl/if_tl.c +++ b/sys/dev/tl/if_tl.c @@ -1104,12 +1104,11 @@ static int tl_attach(dev) device_t dev; { - int i; u_int16_t did, vid; struct tl_type *t; struct ifnet *ifp; struct tl_softc *sc; - int unit, error = 0, rid; + int error, flags, i, rid, unit; u_char eaddr[6]; vid = pci_get_vendor(dev); @@ -1207,10 +1206,9 @@ tl_attach(dev) bzero(sc->tl_ldata, sizeof(struct tl_list_data)); - sc->tl_dinfo = t; - if (t->tl_vid == COMPAQ_VENDORID || t->tl_vid == TI_VENDORID) + if (vid == COMPAQ_VENDORID || vid == TI_VENDORID) sc->tl_eeaddr = TL_EEPROM_EADDR; - if (t->tl_vid == OLICOM_VENDORID) + if (vid == OLICOM_VENDORID) sc->tl_eeaddr = TL_EEPROM_EADDR_OC; /* Reset the adapter. */ @@ -1241,7 +1239,7 @@ tl_attach(dev) * word. To make things even more confusing, neither 00:00:28 * nor 00:00:24 appear in the IEEE OUI database. */ - if (sc->tl_dinfo->tl_vid == OLICOM_VENDORID) { + if (vid == OLICOM_VENDORID) { for (i = 0; i < ETHER_ADDR_LEN; i += 2) { u_int16_t *p; p = (u_int16_t *)&eaddr[i]; @@ -1279,6 +1277,20 @@ tl_attach(dev) * XXX mii_attach() can fail for reason different than * no PHYs found! */ + flags = 0; + if (vid == COMPAQ_VENDORID) { + if (did == COMPAQ_DEVICEID_NETEL_10_100_PROLIANT || + did == COMPAQ_DEVICEID_NETFLEX_3P_INTEGRATED || + did == COMPAQ_DEVICEID_NETFLEX_3P_BNC || + did == COMPAQ_DEVICEID_NETEL_10_T2_UTP_COAX) + flags |= MIIF_MACPRIV0; + if (did == COMPAQ_DEVICEID_NETEL_10 || + did == COMPAQ_DEVICEID_NETEL_10_100_DUAL || + did == COMPAQ_DEVICEID_NETFLEX_3P || + did == COMPAQ_DEVICEID_NETEL_10_100_EMBEDDED) + flags |= MIIF_MACPRIV1; + } else if (vid == OLICOM_VENDORID && did == OLICOM_DEVICEID_OC2183) + flags |= MIIF_MACPRIV0 | MIIF_MACPRIV1; if (mii_attach(dev, &sc->tl_miibus, ifp, tl_ifmedia_upd, tl_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0)) { struct ifmedia *ifm; diff --git a/sys/dev/tl/if_tlreg.h b/sys/dev/tl/if_tlreg.h index f0347e769ee0..4e340b1eee74 100644 --- a/sys/dev/tl/if_tlreg.h +++ b/sys/dev/tl/if_tlreg.h @@ -116,7 +116,6 @@ struct tl_softc { struct resource *tl_irq; struct resource *tl_res; device_t tl_miibus; - struct tl_type *tl_dinfo; /* ThunderLAN adapter info */ u_int8_t tl_eeaddr; struct tl_list_data *tl_ldata; /* TX/RX lists and mbufs */ struct tl_chain_data tl_cdata; From 4a1f2d1b35996dbc86d0091f35a4db548d546b39 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Sun, 24 Oct 2010 12:59:43 +0000 Subject: [PATCH 24/65] - Given that as of r214264 all PHY drivers using mii(4) finally have been converted to use the mii_phy_add_media()/mii_phy_setmedia() pair instead of mii_add_media()/mii_anar() remove the latter. - Declare mii_media mii_media_table static as it shouldn't be used outside of mii_physubr.c. MFC after: never --- sys/dev/mii/mii_physubr.c | 95 +-------------------------------------- sys/dev/mii/miivar.h | 2 - 2 files changed, 1 insertion(+), 96 deletions(-) diff --git a/sys/dev/mii/mii_physubr.c b/sys/dev/mii/mii_physubr.c index 18d8ce87814c..015d53ce11ea 100644 --- a/sys/dev/mii/mii_physubr.c +++ b/sys/dev/mii/mii_physubr.c @@ -56,7 +56,7 @@ __FBSDID("$FreeBSD$"); /* * Media to register setting conversion table. Order matters. */ -const struct mii_media mii_media_table[MII_NMEDIA] = { +static const struct mii_media mii_media_table[MII_NMEDIA] = { /* None */ { BMCR_ISO, ANAR_CSMA, 0, }, @@ -286,99 +286,6 @@ mii_phy_update(struct mii_softc *sc, int cmd) } } -/* - * Given an ifmedia word, return the corresponding ANAR value. - */ -int -mii_anar(int media) -{ - int rv; - - switch (media & (IFM_TMASK|IFM_NMASK|IFM_FDX)) { - case IFM_ETHER|IFM_10_T: - rv = ANAR_10|ANAR_CSMA; - break; - case IFM_ETHER|IFM_10_T|IFM_FDX: - rv = ANAR_10_FD|ANAR_CSMA; - break; - case IFM_ETHER|IFM_100_TX: - rv = ANAR_TX|ANAR_CSMA; - break; - case IFM_ETHER|IFM_100_TX|IFM_FDX: - rv = ANAR_TX_FD|ANAR_CSMA; - break; - case IFM_ETHER|IFM_100_T4: - rv = ANAR_T4|ANAR_CSMA; - break; - default: - rv = 0; - break; - } - - return (rv); -} - -/* - * Initialize generic PHY media based on BMSR, called when a PHY is - * attached. We expect to be set up to print a comma-separated list - * of media names. Does not print a newline. - */ -void -mii_add_media(struct mii_softc *sc) -{ - struct mii_data *mii = sc->mii_pdata; - const char *sep = ""; - - if ((sc->mii_capabilities & BMSR_MEDIAMASK) == 0) { - printf("no media present"); - return; - } - -#define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL) -#define PRINT(s) printf("%s%s", sep, s); sep = ", " - - if (sc->mii_capabilities & BMSR_10THDX) { - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, 0, sc->mii_inst), 0); - PRINT("10baseT"); - } - if (sc->mii_capabilities & BMSR_10TFDX) { - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, IFM_FDX, sc->mii_inst), - BMCR_FDX); - PRINT("10baseT-FDX"); - } - if (sc->mii_capabilities & BMSR_100TXHDX) { - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, 0, sc->mii_inst), - BMCR_S100); - PRINT("100baseTX"); - } - if (sc->mii_capabilities & BMSR_100TXFDX) { - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_FDX, sc->mii_inst), - BMCR_S100|BMCR_FDX); - PRINT("100baseTX-FDX"); - } - if (sc->mii_capabilities & BMSR_100T4) { - /* - * XXX How do you enable 100baseT4? I assume we set - * XXX BMCR_S100 and then assume the PHYs will take - * XXX watever action is necessary to switch themselves - * XXX into T4 mode. - */ - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_T4, 0, sc->mii_inst), - BMCR_S100); - PRINT("100baseT4"); - } - if (sc->mii_capabilities & BMSR_ANEG) { - ADD(IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, sc->mii_inst), - BMCR_AUTOEN); - PRINT("auto"); - } - - - -#undef ADD -#undef PRINT -} - /* * Initialize generic PHY media based on BMSR, called when a PHY is * attached. We expect to be set up to print a comma-separated list diff --git a/sys/dev/mii/miivar.h b/sys/dev/mii/miivar.h index 78d152a248e3..15a3f3f470dc 100644 --- a/sys/dev/mii/miivar.h +++ b/sys/dev/mii/miivar.h @@ -227,12 +227,10 @@ int miibus_detach(device_t); int mii_attach(device_t, device_t *, struct ifnet *, ifm_change_cb_t, ifm_stat_cb_t, int, int, int, int); -int mii_anar(int); void mii_down(struct mii_data *); int mii_mediachg(struct mii_data *); void mii_tick(struct mii_data *); void mii_pollstat(struct mii_data *); -void mii_add_media(struct mii_softc *); void mii_phy_add_media(struct mii_softc *); int mii_phy_auto(struct mii_softc *); From 3afd23533af9d6dd2dd9a5f7c1790c92a1e4a9cb Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Sun, 24 Oct 2010 13:48:11 +0000 Subject: [PATCH 25/65] Remove stray semicolon. Submitted by: arundel --- usr.sbin/usbconfig/usbconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/usbconfig/usbconfig.c b/usr.sbin/usbconfig/usbconfig.c index 2bd3668223bf..a1fa694b2aaf 100644 --- a/usr.sbin/usbconfig/usbconfig.c +++ b/usr.sbin/usbconfig/usbconfig.c @@ -640,7 +640,7 @@ main(int argc, char **argv) opt->bus = unit; opt->addr = addr; opt->got_bus = 1; -; opt->got_addr = 1; + opt->got_addr = 1; n++; break; From 16dc6e122a3f31e3068d041403cdb88782ed82fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrich=20Sp=C3=B6rlein?= Date: Sun, 24 Oct 2010 15:31:41 +0000 Subject: [PATCH 26/65] flex: add missing ifdef magic to create/hide prototypes This unbreaks build for some software with higher WARNS flags. Reviewed by: rpaulo PR: bin/139319 (parts already committed in previous revision) --- usr.bin/lex/flex.skl | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/usr.bin/lex/flex.skl b/usr.bin/lex/flex.skl index 69d9ca2cac4c..ac8eac2d9c37 100644 --- a/usr.bin/lex/flex.skl +++ b/usr.bin/lex/flex.skl @@ -747,7 +747,11 @@ void yyFlexLexer::LexerOutput( const char* buf, int size ) */ %- +#ifdef YY_USE_PROTOS static int yy_get_next_buffer(void) +#else +static int yy_get_next_buffer() +#endif %+ int yyFlexLexer::yy_get_next_buffer() %* @@ -883,7 +887,11 @@ int yyFlexLexer::yy_get_next_buffer() /* yy_get_previous_state - get the state just before the EOB char was reached */ %- +#ifdef YY_USE_PROTOS static yy_state_type yy_get_previous_state(void) +#else +static yy_state_type yy_get_previous_state() +#endif %+ yy_state_type yyFlexLexer::yy_get_previous_state() %* @@ -983,7 +991,11 @@ void yyFlexLexer::yyunput( int c, char* yy_bp ) #ifdef __cplusplus static int yyinput() #else +#ifdef YY_USE_PROTOS static int input(void) +#else +static int input() +#endif #endif %+ int yyFlexLexer::yyinput() @@ -1055,7 +1067,9 @@ int yyFlexLexer::yyinput() return c; } -#endif /* ifndef YY_NO_INPUT */ +%- +#endif /* ifndef YY_NO_INPUT */ +%* %- @@ -1404,7 +1418,11 @@ void yyFlexLexer::yy_push_state( int new_state ) #ifndef YY_NO_POP_STATE %- +#ifdef YY_USE_PROTOS +static void yy_pop_state(void) +#else static void yy_pop_state() +#endif %+ void yyFlexLexer::yy_pop_state() %* @@ -1419,7 +1437,11 @@ void yyFlexLexer::yy_pop_state() #ifndef YY_NO_TOP_STATE %- +#ifdef YY_USE_PROTOS +static int yy_top_state(void) +#else static int yy_top_state() +#endif %+ int yyFlexLexer::yy_top_state() %* From 2964aeb34a1b0b2a1fa3c3aa34d253a99e7fc71a Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 15:38:58 +0000 Subject: [PATCH 27/65] Load geom_gate.ko module after parsing arguments. MFC after: 3 days --- sbin/hastd/hastd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index a025d6b8aa4e..cb61811246c0 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -701,8 +701,6 @@ main(int argc, char *argv[]) int debuglevel; sigset_t mask; - g_gate_load(); - foreground = false; debuglevel = 0; pidfile = HASTD_PIDFILE; @@ -736,6 +734,8 @@ main(int argc, char *argv[]) pjdlog_debug_set(debuglevel); + g_gate_load(); + pfh = pidfile_open(pidfile, 0600, &otherpid); if (pfh == NULL) { if (errno == EEXIST) { From 584a9bc3f8c2d31008b81ed2e15db96d2200155b Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 15:41:23 +0000 Subject: [PATCH 28/65] Plug memory leaks. Found with: valgrind MFC after: 3 days --- sbin/hastd/parse.y | 24 ++++++++++++++++++++++++ sbin/hastd/primary.c | 1 + sbin/hastd/synch.h | 2 ++ 3 files changed, 27 insertions(+) diff --git a/sbin/hastd/parse.y b/sbin/hastd/parse.y index ca575cf1238b..d37e6db4960d 100644 --- a/sbin/hastd/parse.y +++ b/sbin/hastd/parse.y @@ -264,6 +264,7 @@ control_statement: CONTROL STR sizeof(depth0_control)) >= sizeof(depth0_control)) { pjdlog_error("control argument is too long."); + free($2); return (1); } break; @@ -274,12 +275,14 @@ control_statement: CONTROL STR sizeof(lconfig->hc_controladdr)) >= sizeof(lconfig->hc_controladdr)) { pjdlog_error("control argument is too long."); + free($2); return (1); } break; default: assert(!"control at wrong depth level"); } + free($2); } ; @@ -291,6 +294,7 @@ listen_statement: LISTEN STR sizeof(depth0_listen)) >= sizeof(depth0_listen)) { pjdlog_error("listen argument is too long."); + free($2); return (1); } break; @@ -301,12 +305,14 @@ listen_statement: LISTEN STR sizeof(lconfig->hc_listenaddr)) >= sizeof(lconfig->hc_listenaddr)) { pjdlog_error("listen argument is too long."); + free($2); return (1); } break; default: assert(!"listen at wrong depth level"); } + free($2); } ; @@ -357,6 +363,7 @@ exec_statement: EXEC STR if (strlcpy(depth0_exec, $2, sizeof(depth0_exec)) >= sizeof(depth0_exec)) { pjdlog_error("Exec path is too long."); + free($2); return (1); } break; @@ -367,12 +374,14 @@ exec_statement: EXEC STR sizeof(curres->hr_exec)) >= sizeof(curres->hr_exec)) { pjdlog_error("Exec path is too long."); + free($2); return (1); } break; default: assert(!"exec at wrong depth level"); } + free($2); } ; @@ -386,6 +395,7 @@ node_start: STR { switch (isitme($1)) { case -1: + free($1); return (1); case 0: break; @@ -395,6 +405,7 @@ node_start: STR default: assert(!"invalid isitme() return value"); } + free($1); } ; @@ -482,14 +493,17 @@ resource_start: STR curres = calloc(1, sizeof(*curres)); if (curres == NULL) { pjdlog_error("Unable to allocate memory for resource."); + free($1); return (1); } if (strlcpy(curres->hr_name, $1, sizeof(curres->hr_name)) >= sizeof(curres->hr_name)) { pjdlog_error("Resource name is too long."); + free($1); return (1); } + free($1); curres->hr_role = HAST_ROLE_INIT; curres->hr_previous_role = HAST_ROLE_INIT; curres->hr_replication = -1; @@ -530,6 +544,7 @@ name_statement: NAME STR sizeof(depth1_provname)) >= sizeof(depth1_provname)) { pjdlog_error("name argument is too long."); + free($2); return (1); } break; @@ -541,12 +556,14 @@ name_statement: NAME STR sizeof(curres->hr_provname)) >= sizeof(curres->hr_provname)) { pjdlog_error("name argument is too long."); + free($2); return (1); } break; default: assert(!"name at wrong depth level"); } + free($2); } ; @@ -558,6 +575,7 @@ local_statement: LOCAL STR sizeof(depth1_localpath)) >= sizeof(depth1_localpath)) { pjdlog_error("local argument is too long."); + free($2); return (1); } break; @@ -569,12 +587,14 @@ local_statement: LOCAL STR sizeof(curres->hr_localpath)) >= sizeof(curres->hr_localpath)) { pjdlog_error("local argument is too long."); + free($2); return (1); } break; default: assert(!"local at wrong depth level"); } + free($2); } ; @@ -589,6 +609,7 @@ resource_node_start: STR if (curres != NULL) { switch (isitme($1)) { case -1: + free($1); return (1); case 0: break; @@ -599,6 +620,7 @@ resource_node_start: STR assert(!"invalid isitme() return value"); } } + free($1); } ; @@ -624,8 +646,10 @@ remote_statement: REMOTE STR sizeof(curres->hr_remoteaddr)) >= sizeof(curres->hr_remoteaddr)) { pjdlog_error("remote argument is too long."); + free($2); return (1); } } + free($2); } ; diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index aabbecf33d97..5fb903c90717 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -646,6 +646,7 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, */ (void)hast_activemap_flush(res); } + nv_free(nvin); pjdlog_info("Connected to %s.", res->hr_remoteaddr); if (inp != NULL && outp != NULL) { *inp = in; diff --git a/sbin/hastd/synch.h b/sbin/hastd/synch.h index 1dda49d7eb71..cf388d7d0cd6 100644 --- a/sbin/hastd/synch.h +++ b/sbin/hastd/synch.h @@ -140,6 +140,8 @@ cv_init(pthread_cond_t *cv) assert(error == 0); error = pthread_cond_init(cv, &attr); assert(error == 0); + error = pthread_condattr_destroy(&attr); + assert(error == 0); } static __inline void cv_wait(pthread_cond_t *cv, pthread_mutex_t *lock) From d7be7905aee45a47eb1b8c25a037c5e025930d74 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 15:42:16 +0000 Subject: [PATCH 29/65] Plug memory leak. MFC after: 3 days --- sbin/hastd/secondary.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c index 403c5b214e67..d494dfa465ad 100644 --- a/sbin/hastd/secondary.c +++ b/sbin/hastd/secondary.c @@ -323,6 +323,8 @@ init_remote(struct hast_resource *res, struct nv *nvin) nv_free(nvout); exit(EX_TEMPFAIL); } + if (map != NULL) + free(map); nv_free(nvout); if (res->hr_secondary_localcnt > res->hr_primary_remotecnt && res->hr_primary_localcnt > res->hr_secondary_remotecnt) { From 1f39b279465b71760c2907a426c50902ec973d5c Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 15:44:23 +0000 Subject: [PATCH 30/65] Simplify code a bit. MFC after: 3 days --- sbin/hastd/secondary.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c index d494dfa465ad..6d89f3559704 100644 --- a/sbin/hastd/secondary.c +++ b/sbin/hastd/secondary.c @@ -318,10 +318,8 @@ init_remote(struct hast_resource *res, struct nv *nvin) (uintmax_t)res->hr_secondary_remotecnt); } if (hast_proto_send(res, res->hr_remotein, nvout, map, mapsize) < 0) { - pjdlog_errno(LOG_WARNING, "Unable to send activemap to %s", + pjdlog_exit(EX_TEMPFAIL, "Unable to send activemap to %s", res->hr_remoteaddr); - nv_free(nvout); - exit(EX_TEMPFAIL); } if (map != NULL) free(map); From f36575b51cb531d9674869c8a4039526d1c384f2 Mon Sep 17 00:00:00 2001 From: Jaakko Heinonen Date: Sun, 24 Oct 2010 15:56:21 +0000 Subject: [PATCH 31/65] Document make_dev_p(9). Reviewed by: brueffer, kib --- share/man/man9/Makefile | 3 +- share/man/man9/make_dev.9 | 63 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 79ba70b83b34..e6a490a9d98a 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -775,7 +775,8 @@ MLINKS+=make_dev.9 destroy_dev.9 \ make_dev.9 dev_depends.9 \ make_dev.9 make_dev_alias.9 \ make_dev.9 make_dev_cred.9 \ - make_dev.9 make_dev_credf.9 + make_dev.9 make_dev_credf.9 \ + make_dev.9 make_dev_p.9 MLINKS+=malloc.9 free.9 \ malloc.9 MALLOC_DECLARE.9 \ malloc.9 MALLOC_DEFINE.9 \ diff --git a/share/man/man9/make_dev.9 b/share/man/man9/make_dev.9 index 2e35f5fc81bc..d846d57a1419 100644 --- a/share/man/man9/make_dev.9 +++ b/share/man/man9/make_dev.9 @@ -24,13 +24,14 @@ .\" .\" $FreeBSD$ .\" -.Dd October 7, 2010 +.Dd October 24, 2010 .Dt MAKE_DEV 9 .Os .Sh NAME .Nm make_dev , .Nm make_dev_cred , .Nm make_dev_credf , +.Nm make_dev_p , .Nm make_dev_alias , .Nm destroy_dev , .Nm destroy_dev_sched , @@ -49,6 +50,8 @@ and DEVFS registration for devices .Fn make_dev_cred "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... .Ft struct cdev * .Fn make_dev_credf "int flags" "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... +.Ft int +.Fn make_dev_p "int flags" "struct cdev **cdev" "struct cdevsw *devsw" "struct ucred *cr" "uid_t uid" "gid_t gid" "int mode" "const char *fmt" ... .Ft struct cdev * .Fn make_dev_alias "struct cdev *pdev" "const char *fmt" ... .Ft void @@ -128,20 +131,22 @@ member of the initialized The .Va flags argument alters the operation of -.Fn make_dev_credf . +.Fn make_dev_credf +or +.Fn make_dev_p . The following values are currently accepted: .Pp .Bl -tag -width "MAKEDEV_CHECKNAME" -compact -offset indent .It MAKEDEV_REF reference the created device .It MAKEDEV_NOWAIT -do not sleep, may return NULL +do not sleep, the call may fail .It MAKEDEV_WAITOK allow the function to sleep to satisfy malloc .It MAKEDEV_ETERNAL created device will be never destroyed .It MAKEDEV_CHECKNAME -return NULL if the device name is invalid or already exists +return an error if the device name is invalid or already exists .El .Pp The @@ -190,6 +195,14 @@ make_dev_credf(0, cdevsw, unit, NULL, uid, gid, perms, fmt, ...); .Ed .Pp The +.Fn make_dev_p +function is similar to +.Fn make_dev_credf +but it may return an error number and takes a pointer to the resulting +.Ft *cdev +as an argument. +.Pp +The .Fn make_dev_alias function takes the returned .Ft cdev @@ -293,6 +306,44 @@ is called for all instantiated devices, but need to postpone module unload until .Fn destroy_dev is actually finished for all of them. +.Sh RETURN VALUES +If successful, +.Fn make_dev_p +will return 0, otherwise it will return an error. +If successful, +.Fn make_dev_credf +will return a valid +.Fa cdev +pointer, otherwise it will return +.Dv NULL . +.Sh ERRORS +The +.Fn make_dev_p +call will fail and the device will be not registered if: +.Bl -tag -width Er +.It Bq Er ENOMEM +The +.Dv MAKEDEV_NOWAIT +flags was specified and a memory allocation request could not be satisfied. +.It Bq Er ENAMETOOLONG +The +.Dv MAKEDEV_CHECKNAME +flags was specified and the provided device name is longer than +.Dv SPECNAMELEN . +.It Bq Er EINVAL +The +.Dv MAKEDEV_CHECKNAME +flags was specified and the provided device name is empty, contains a +.Qq \&. +or +.Qq .. +path component or ends with +.Ql / . +.It Bq Er EEXIST +The +.Dv MAKEDEV_CHECKNAME +flags was specified and the provided device name already exists. +.El .Pp .Sh SEE ALSO .Xr devctl 4 , @@ -320,3 +371,7 @@ The functions .Fn destroy_dev_sched_cb first appeared in .Fx 7.0 . +The function +.Fn make_dev_p +first appeared in +.Fx 8.2 . From 95adbc959bdf18e1f294d64e7a25339462690fb9 Mon Sep 17 00:00:00 2001 From: Jaakko Heinonen Date: Sun, 24 Oct 2010 16:10:32 +0000 Subject: [PATCH 32/65] Use .Dv with NULL. --- share/man/man9/make_dev.9 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/share/man/man9/make_dev.9 b/share/man/man9/make_dev.9 index d846d57a1419..54b3ae1c9634 100644 --- a/share/man/man9/make_dev.9 +++ b/share/man/man9/make_dev.9 @@ -271,7 +271,9 @@ After .Fn destroy_dev is finished, and if the supplied .Fa cb -is not NULL, the callback +is not +.Dv NULL , +the callback .Fa cb is called, with argument .Fa arg . From fd104c151b31d66427641e6c99a5f837eed63eb4 Mon Sep 17 00:00:00 2001 From: Rebecca Cran Date: Sun, 24 Oct 2010 16:31:57 +0000 Subject: [PATCH 33/65] Mostly revert r203420, and add similar functionality into ada(4) since the existing code caused problems with some SCSI controllers. A new sysctl kern.cam.ada.spindown_shutdown has been added that controls whether or not to spin-down disks when shutting down. Spinning down the disks unloads/parks the heads - this is much better than removing power when the disk is still spinning because otherwise an Emergency Unload occurs which may cause damage to the actuator. PR: kern/140752 Submitted by: olli Reviewed by: arundel Discussed with: mav MFC after: 2 weeks --- share/man/man4/ada.4 | 3 ++ sys/cam/ata/ata_da.c | 64 ++++++++++++++++++++++++++- sys/cam/cam_xpt.c | 93 ---------------------------------------- sys/kern/kern_shutdown.c | 2 +- 4 files changed, 67 insertions(+), 95 deletions(-) diff --git a/share/man/man4/ada.4 b/share/man/man4/ada.4 index cba892b03c4d..d16a5eac197c 100644 --- a/share/man/man4/ada.4 +++ b/share/man/man4/ada.4 @@ -118,6 +118,9 @@ This variable determines how long the driver will wait before timing out an outstanding command. The units for this value are seconds, and the default is currently 30 seconds. +.It kern.cam.ada.spindown_shutdown +.Pp +This variable determines whether to spin-down disks when shutting down. .El .Sh FILES .Bl -tag -width ".Pa /dev/ada*" -compact diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 8a279311b4a3..44d2fa6225fe 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #endif /* _KERNEL */ @@ -79,7 +80,8 @@ typedef enum { ADA_FLAG_CAN_TRIM = 0x080, ADA_FLAG_OPEN = 0x100, ADA_FLAG_SCTX_INIT = 0x200, - ADA_FLAG_CAN_CFA = 0x400 + ADA_FLAG_CAN_CFA = 0x400, + ADA_FLAG_CAN_POWERMGT = 0x800 } ada_flags; typedef enum { @@ -180,6 +182,10 @@ static void adashutdown(void *arg, int howto); #define ADA_DEFAULT_SEND_ORDERED 1 #endif +#ifndef ADA_DEFAULT_SPINDOWN_SHUTDOWN +#define ADA_DEFAULT_SPINDOWN_SHUTDOWN 1 +#endif + /* * Most platforms map firmware geometry to actual, but some don't. If * not overridden, default to nothing. @@ -191,6 +197,7 @@ static void adashutdown(void *arg, int howto); static int ada_retry_count = ADA_DEFAULT_RETRY; static int ada_default_timeout = ADA_DEFAULT_TIMEOUT; static int ada_send_ordered = ADA_DEFAULT_SEND_ORDERED; +static int ada_spindown_shutdown = ADA_DEFAULT_SPINDOWN_SHUTDOWN; SYSCTL_NODE(_kern_cam, OID_AUTO, ada, CTLFLAG_RD, 0, "CAM Direct Access Disk driver"); @@ -203,6 +210,9 @@ TUNABLE_INT("kern.cam.ada.default_timeout", &ada_default_timeout); SYSCTL_INT(_kern_cam_ada, OID_AUTO, ada_send_ordered, CTLFLAG_RW, &ada_send_ordered, 0, "Send Ordered Tags"); TUNABLE_INT("kern.cam.ada.ada_send_ordered", &ada_send_ordered); +SYSCTL_INT(_kern_cam_ada, OID_AUTO, spindown_shutdown, CTLFLAG_RW, + &ada_spindown_shutdown, 0, "Spin down upon shutdown"); +TUNABLE_INT("kern.cam.ada.spindown_shutdown", &ada_spindown_shutdown); /* * ADA_ORDEREDTAG_INTERVAL determines how often, relative @@ -665,6 +675,8 @@ adaregister(struct cam_periph *periph, void *arg) softc->flags |= ADA_FLAG_CAN_48BIT; if (cgd->ident_data.support.command2 & ATA_SUPPORT_FLUSHCACHE) softc->flags |= ADA_FLAG_CAN_FLUSHCACHE; + if (cgd->ident_data.support.command1 & ATA_SUPPORT_POWERMGT) + softc->flags |= ADA_FLAG_CAN_POWERMGT; if (cgd->ident_data.satacapabilities & ATA_SUPPORT_NCQ && cgd->inq_flags & SID_CmdQue) softc->flags |= ADA_FLAG_CAN_NCQ; @@ -1227,6 +1239,56 @@ adashutdown(void * arg, int howto) /*getcount_only*/0); cam_periph_unlock(periph); } + + if (ada_spindown_shutdown == 0 || + (howto & (RB_HALT | RB_POWEROFF)) == 0) + return; + + TAILQ_FOREACH(periph, &adadriver.units, unit_links) { + union ccb ccb; + + /* If we paniced with lock held - not recurse here. */ + if (cam_periph_owned(periph)) + continue; + cam_periph_lock(periph); + softc = (struct ada_softc *)periph->softc; + /* + * We only spin-down the drive if it is capable of it.. + */ + if ((softc->flags & ADA_FLAG_CAN_POWERMGT) == 0) { + cam_periph_unlock(periph); + continue; + } + + if (bootverbose) + xpt_print(periph->path, "spin-down\n"); + + xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); + + ccb.ccb_h.ccb_state = ADA_CCB_DUMP; + cam_fill_ataio(&ccb.ataio, + 1, + adadone, + CAM_DIR_NONE, + 0, + NULL, + 0, + ada_default_timeout*1000); + + ata_28bit_cmd(&ccb.ataio, ATA_STANDBY_IMMEDIATE, 0, 0, 0); + xpt_polled_action(&ccb); + + if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) + xpt_print(periph->path, "Spin-down disk failed\n"); + + if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(ccb.ccb_h.path, + /*relsim_flags*/0, + /*reduction*/0, + /*timeout*/0, + /*getcount_only*/0); + cam_periph_unlock(periph); + } } #endif /* _KERNEL */ diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c index 7023015756e8..dc16e9f2db87 100644 --- a/sys/cam/cam_xpt.c +++ b/sys/cam/cam_xpt.c @@ -39,7 +39,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -153,10 +152,6 @@ static struct xpt_softc xsoftc; TUNABLE_INT("kern.cam.boot_delay", &xsoftc.boot_delay); SYSCTL_INT(_kern_cam, OID_AUTO, boot_delay, CTLFLAG_RDTUN, &xsoftc.boot_delay, 0, "Bus registration wait time"); -static int xpt_power_down = 0; -TUNABLE_INT("kern.cam.power_down", &xpt_power_down); -SYSCTL_INT(_kern_cam, OID_AUTO, power_down, CTLFLAG_RW, - &xpt_power_down, 0, "Power down devices on shutdown"); /* Queues for our software interrupt handler */ typedef TAILQ_HEAD(cam_isrq, ccb_hdr) cam_isrq_t; @@ -250,7 +245,6 @@ static struct cam_ed* xpt_find_device(struct cam_et *target, lun_id_t lun_id); static void xpt_config(void *arg); static xpt_devicefunc_t xptpassannouncefunc; -static void xpt_shutdown(void *arg, int howto); static void xptaction(struct cam_sim *sim, union ccb *work_ccb); static void xptpoll(struct cam_sim *sim); static void camisr(void *); @@ -4538,12 +4532,6 @@ xpt_config(void *arg) #endif /* CAM_DEBUG_BUS */ #endif /* CAMDEBUG */ - /* Register our shutdown event handler */ - if ((EVENTHANDLER_REGISTER(shutdown_final, xpt_shutdown, - NULL, SHUTDOWN_PRI_FIRST)) == NULL) { - printf("xpt_config: failed to register shutdown event.\n"); - } - periphdriver_init(1); xpt_hold_boot(); callout_init(&xsoftc.boot_callout, 1); @@ -4625,87 +4613,6 @@ xpt_finishconfig_task(void *context, int pending) free(context, M_CAMXPT); } -/* - * Power down all devices when we are going to power down the system. - */ -static void -xpt_shutdown_dev_done(struct cam_periph *periph, union ccb *done_ccb) -{ - - /* No-op. We're polling. */ - return; -} - -static int -xpt_shutdown_dev(struct cam_ed *device, void *arg) -{ - union ccb ccb; - struct cam_path path; - - if (device->flags & CAM_DEV_UNCONFIGURED) - return (1); - - if (device->protocol == PROTO_ATA) { - /* Only power down device if it supports power management. */ - if ((device->ident_data.support.command1 & - ATA_SUPPORT_POWERMGT) == 0) - return (1); - } else if (device->protocol != PROTO_SCSI) - return (1); - - xpt_compile_path(&path, - NULL, - device->target->bus->path_id, - device->target->target_id, - device->lun_id); - xpt_setup_ccb(&ccb.ccb_h, &path, CAM_PRIORITY_NORMAL); - if (device->protocol == PROTO_ATA) { - cam_fill_ataio(&ccb.ataio, - 1, - xpt_shutdown_dev_done, - CAM_DIR_NONE, - 0, - NULL, - 0, - 30*1000); - ata_28bit_cmd(&ccb.ataio, ATA_SLEEP, 0, 0, 0); - } else { - scsi_start_stop(&ccb.csio, - /*retries*/1, - xpt_shutdown_dev_done, - MSG_SIMPLE_Q_TAG, - /*start*/FALSE, - /*load/eject*/FALSE, - /*immediate*/TRUE, - SSD_FULL_SIZE, - /*timeout*/50*1000); - } - xpt_polled_action(&ccb); - - if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) - xpt_print(&path, "Device power down failed\n"); - if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) - cam_release_devq(ccb.ccb_h.path, - /*relsim_flags*/0, - /*reduction*/0, - /*timeout*/0, - /*getcount_only*/0); - xpt_release_path(&path); - return (1); -} - -static void -xpt_shutdown(void * arg, int howto) -{ - - if (!xpt_power_down) - return; - if ((howto & RB_POWEROFF) == 0) - return; - - xpt_for_all_devices(xpt_shutdown_dev, NULL); -} - cam_status xpt_register_async(int event, ac_callback_t *cbfunc, void *cbarg, struct cam_path *path) diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index c20a1c630fbd..9553a3aba3a7 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -144,7 +144,7 @@ shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, - SHUTDOWN_PRI_FIRST + 100); + SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, From 281fb05e8339bdd4d934cf901556593eed96acdf Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 16:55:17 +0000 Subject: [PATCH 34/65] sh: Add some testcases for alias expansion. --- tools/regression/bin/sh/parser/alias1.0 | 5 +++++ tools/regression/bin/sh/parser/alias2.0 | 6 ++++++ 2 files changed, 11 insertions(+) create mode 100644 tools/regression/bin/sh/parser/alias1.0 create mode 100644 tools/regression/bin/sh/parser/alias2.0 diff --git a/tools/regression/bin/sh/parser/alias1.0 b/tools/regression/bin/sh/parser/alias1.0 new file mode 100644 index 000000000000..75dd9ab9b8f1 --- /dev/null +++ b/tools/regression/bin/sh/parser/alias1.0 @@ -0,0 +1,5 @@ +# $FreeBSD$ + +alias alias0=exit +eval 'alias0 0' +exit 1 diff --git a/tools/regression/bin/sh/parser/alias2.0 b/tools/regression/bin/sh/parser/alias2.0 new file mode 100644 index 000000000000..ae99b8a588c2 --- /dev/null +++ b/tools/regression/bin/sh/parser/alias2.0 @@ -0,0 +1,6 @@ +# $FreeBSD$ + +alias alias0=exit +x=alias0 +eval 'case $x in alias0) exit 0;; esac' +exit 1 From ba08f69b5c299309a85231190035d757730c79c9 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 17:06:49 +0000 Subject: [PATCH 35/65] sh: Change ! within a pipeline to start a new pipeline instead. This is how ksh93 treats ! within a pipeline and makes the ! in a | ! b | c negate the exit status of the pipeline, as if it were a | { ! b | c; } Side effect: something like f() ! a is now a syntax error, because a function definition takes a command, not a pipeline. Exp-run done by: pav (with some other sh(1) changes) --- bin/sh/parser.c | 30 ++++++++-------------- tools/regression/bin/sh/parser/pipe-not1.0 | 3 +++ 2 files changed, 14 insertions(+), 19 deletions(-) create mode 100644 tools/regression/bin/sh/parser/pipe-not1.0 diff --git a/bin/sh/parser.c b/bin/sh/parser.c index 018b6988b4c5..6c504e5dfc1d 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -328,7 +328,7 @@ pipeline(void) { union node *n1, *n2, *pipenode; struct nodelist *lp, *prev; - int negate; + int negate, t; negate = 0; checkkwd = 2; @@ -347,7 +347,13 @@ pipeline(void) do { prev = lp; lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); - lp->n = command(); + checkkwd = 2; + t = readtoken(); + tokpushback++; + if (t == TNOT) + lp->n = pipeline(); + else + lp->n = command(); prev->next = lp; } while (readtoken() == TPIPE); lp->next = NULL; @@ -372,7 +378,7 @@ command(void) union node *ap, **app; union node *cp, **cpp; union node *redir, **rpp; - int t, negate = 0; + int t; checkkwd = 2; redir = NULL; @@ -387,12 +393,6 @@ command(void) } tokpushback++; - while (readtoken() == TNOT) { - TRACE(("command: TNOT recognized\n")); - negate = !negate; - } - tokpushback++; - switch (readtoken()) { case TIF: n1 = (union node *)stalloc(sizeof (struct nif)); @@ -573,7 +573,7 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); case TRP: tokpushback++; n1 = simplecmd(rpp, redir); - goto checkneg; + return n1; default: synexpect(-1); } @@ -596,15 +596,7 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); n1->nredir.redirect = redir; } -checkneg: - if (negate) { - n2 = (union node *)stalloc(sizeof (struct nnot)); - n2->type = NNOT; - n2->nnot.com = n1; - return n2; - } - else - return n1; + return n1; } diff --git a/tools/regression/bin/sh/parser/pipe-not1.0 b/tools/regression/bin/sh/parser/pipe-not1.0 new file mode 100644 index 000000000000..9842ff0afd05 --- /dev/null +++ b/tools/regression/bin/sh/parser/pipe-not1.0 @@ -0,0 +1,3 @@ +# $FreeBSD$ + +: | ! : | false From 3dea75d2a838f8c14b75df77b42e46b24fc8268c Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 17:22:34 +0000 Subject: [PATCH 36/65] Move all NV defines into nv.c, they are not used externally thus there is no need to make then visible from outside. MFC after: 3 days --- sbin/hastd/nv.c | 27 +++++++++++++++++++++++++++ sbin/hastd/nv.h | 27 --------------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/sbin/hastd/nv.c b/sbin/hastd/nv.c index 49b0cbbcb847..a0de09adb0d5 100644 --- a/sbin/hastd/nv.c +++ b/sbin/hastd/nv.c @@ -46,6 +46,33 @@ __FBSDID("$FreeBSD$"); #include #include +#define NV_TYPE_INT8 1 +#define NV_TYPE_UINT8 2 +#define NV_TYPE_INT16 3 +#define NV_TYPE_UINT16 4 +#define NV_TYPE_INT32 5 +#define NV_TYPE_UINT32 6 +#define NV_TYPE_INT64 7 +#define NV_TYPE_UINT64 8 +#define NV_TYPE_INT8_ARRAY 9 +#define NV_TYPE_UINT8_ARRAY 10 +#define NV_TYPE_INT16_ARRAY 11 +#define NV_TYPE_UINT16_ARRAY 12 +#define NV_TYPE_INT32_ARRAY 13 +#define NV_TYPE_UINT32_ARRAY 14 +#define NV_TYPE_INT64_ARRAY 15 +#define NV_TYPE_UINT64_ARRAY 16 +#define NV_TYPE_STRING 17 + +#define NV_TYPE_MASK 0x7f +#define NV_TYPE_FIRST NV_TYPE_INT8 +#define NV_TYPE_LAST NV_TYPE_STRING + +#define NV_ORDER_NETWORK 0x00 +#define NV_ORDER_HOST 0x80 + +#define NV_ORDER_MASK 0x80 + #define NV_MAGIC 0xaea1e struct nv { int nv_magic; diff --git a/sbin/hastd/nv.h b/sbin/hastd/nv.h index 16775480b026..7aee3c44aa96 100644 --- a/sbin/hastd/nv.h +++ b/sbin/hastd/nv.h @@ -41,33 +41,6 @@ #include -#define NV_TYPE_INT8 1 -#define NV_TYPE_UINT8 2 -#define NV_TYPE_INT16 3 -#define NV_TYPE_UINT16 4 -#define NV_TYPE_INT32 5 -#define NV_TYPE_UINT32 6 -#define NV_TYPE_INT64 7 -#define NV_TYPE_UINT64 8 -#define NV_TYPE_INT8_ARRAY 9 -#define NV_TYPE_UINT8_ARRAY 10 -#define NV_TYPE_INT16_ARRAY 11 -#define NV_TYPE_UINT16_ARRAY 12 -#define NV_TYPE_INT32_ARRAY 13 -#define NV_TYPE_UINT32_ARRAY 14 -#define NV_TYPE_INT64_ARRAY 15 -#define NV_TYPE_UINT64_ARRAY 16 -#define NV_TYPE_STRING 17 - -#define NV_TYPE_MASK 0x7f -#define NV_TYPE_FIRST NV_TYPE_INT8 -#define NV_TYPE_LAST NV_TYPE_STRING - -#define NV_ORDER_NETWORK 0x00 -#define NV_ORDER_HOST 0x80 - -#define NV_ORDER_MASK 0x80 - struct nv; struct nv *nv_alloc(void); From b9ffbb0a94fc0aac41c5956117b566d4b7b40c22 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 17:24:08 +0000 Subject: [PATCH 37/65] Implement nv_exists() function that returns true if argument of the given name exists. MFC after: 3 days --- sbin/hastd/nv.c | 28 +++++++++++++++++++++++++++- sbin/hastd/nv.h | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/sbin/hastd/nv.c b/sbin/hastd/nv.c index a0de09adb0d5..aa37fcf2ec18 100644 --- a/sbin/hastd/nv.c +++ b/sbin/hastd/nv.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); #include #include +#define NV_TYPE_NONE 0 + #define NV_TYPE_INT8 1 #define NV_TYPE_UINT8 2 #define NV_TYPE_INT16 3 @@ -561,6 +563,29 @@ nv_get_string(struct nv *nv, const char *namefmt, ...) return (str); } +bool +nv_exists(struct nv *nv, const char *namefmt, ...) +{ + struct nvhdr *nvh; + va_list nameap; + int snverror, serrno; + + if (nv == NULL) + return (false); + + serrno = errno; + snverror = nv->nv_error; + + va_start(nameap, namefmt); + nvh = nv_find(nv, NV_TYPE_NONE, namefmt, nameap); + va_end(nameap); + + errno = serrno; + nv->nv_error = snverror; + + return (nvh != NULL); +} + /* * Dump content of the nv structure. */ @@ -797,7 +822,8 @@ nv_find(struct nv *nv, int type, const char *namefmt, va_list nameap) assert(size >= NVH_SIZE(nvh)); nv_swap(nvh, true); if (strcmp(nvh->nvh_name, name) == 0) { - if ((nvh->nvh_type & NV_TYPE_MASK) != type) { + if (type != NV_TYPE_NONE && + (nvh->nvh_type & NV_TYPE_MASK) != type) { errno = EINVAL; if (nv->nv_error == 0) nv->nv_error = EINVAL; diff --git a/sbin/hastd/nv.h b/sbin/hastd/nv.h index 7aee3c44aa96..664557ec3441 100644 --- a/sbin/hastd/nv.h +++ b/sbin/hastd/nv.h @@ -126,6 +126,7 @@ const uint64_t *nv_get_uint64_array(struct nv *nv, size_t *sizep, const char *nv_get_string(struct nv *nv, const char *namefmt, ...) __printflike(2, 3); +bool nv_exists(struct nv *nv, const char *namefmt, ...) __printflike(2, 3); void nv_dump(struct nv *nv); #endif /* !_NV_H_ */ From ce837469ba69aeb842d9022c3d574f868140ea5e Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sun, 24 Oct 2010 17:28:25 +0000 Subject: [PATCH 38/65] Before this change on first connect between primary and secondary we initialize all the data. This is huge waste of time and resources if there were no writes yet, as there is no real data to synchronize. Optimize this by sending "virgin" argument to secondary, which gives it a hint that synchronization is not needed. In the common case (where noth nodes are configured at the same time) instead of synchronizing everything, we don't synchronize at all. MFC after: 1 week --- sbin/hastd/primary.c | 43 +++++++++++++++++++++++++++++++++++++++--- sbin/hastd/secondary.c | 13 +++++++++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index 5fb903c90717..4c2f44f68031 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -417,6 +417,24 @@ init_environment(struct hast_resource *res __unused) } } +static bool +init_resuid(struct hast_resource *res) +{ + + mtx_lock(&metadata_lock); + if (res->hr_resuid != 0) { + mtx_unlock(&metadata_lock); + return (false); + } else { + /* Initialize unique resource identifier. */ + arc4random_buf(&res->hr_resuid, sizeof(res->hr_resuid)); + mtx_unlock(&metadata_lock); + if (metadata_write(res) < 0) + exit(EX_NOINPUT); + return (true); + } +} + static void init_local(struct hast_resource *res) { @@ -452,10 +470,12 @@ init_local(struct hast_resource *res) if (res->hr_resuid != 0) return; /* - * We're using provider for the first time, so we have to generate - * resource unique identifier and initialize local and remote counts. + * We're using provider for the first time. Initialize local and remote + * counters. We don't initialize resuid here, as we want to do it just + * in time. The reason for this is that we want to inform secondary + * that there were no writes yet, so there is no need to synchronize + * anything. */ - arc4random_buf(&res->hr_resuid, sizeof(res->hr_resuid)); res->hr_primary_localcnt = 1; res->hr_primary_remotecnt = 0; if (metadata_write(res) < 0) @@ -566,6 +586,19 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, nv_add_string(nvout, res->hr_name, "resource"); nv_add_uint8_array(nvout, res->hr_token, sizeof(res->hr_token), "token"); + if (res->hr_resuid == 0) { + /* + * The resuid field was not yet initialized. + * Because we do synchronization inside init_resuid(), it is + * possible that someone already initialized it, the function + * will return false then, but if we successfully initialized + * it, we will get true. True means that there were no writes + * to this resource yet and we want to inform secondary that + * synchronization is not needed by sending "virgin" argument. + */ + if (init_resuid(res)) + nv_add_int8(nvout, 1, "virgin"); + } nv_add_uint64(nvout, res->hr_resuid, "resuid"); nv_add_uint64(nvout, res->hr_primary_localcnt, "localcnt"); nv_add_uint64(nvout, res->hr_primary_remotecnt, "remotecnt"); @@ -1006,6 +1039,10 @@ ggate_recv_thread(void *arg) QUEUE_INSERT1(hio, send, ncomp); break; case BIO_WRITE: + if (res->hr_resuid == 0) { + /* This is first write, initialize resuid. */ + (void)init_resuid(res); + } for (;;) { mtx_lock(&range_lock); if (rangelock_islocked(range_sync, diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c index 6d89f3559704..a722726f5e6c 100644 --- a/sbin/hastd/secondary.c +++ b/sbin/hastd/secondary.c @@ -243,13 +243,22 @@ init_remote(struct hast_resource *res, struct nv *nvin) */ if (res->hr_resuid == 0) { /* - * Provider is used for the first time. Initialize everything. + * Provider is used for the first time. If primary node done no + * writes yet as well (we will find "virgin" argument) then + * there is no need to synchronize anything. If primary node + * done any writes already we have to synchronize everything. */ assert(res->hr_secondary_localcnt == 0); res->hr_resuid = resuid; if (metadata_write(res) < 0) exit(EX_NOINPUT); - memset(map, 0xff, mapsize); + if (nv_exists(nvin, "virgin")) { + free(map); + map = NULL; + mapsize = 0; + } else { + memset(map, 0xff, mapsize); + } nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc"); } else if ( /* Is primary is out-of-date? */ From a4bd51a562109ecc91d08ae0da9afeb35774292f Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Sun, 24 Oct 2010 18:53:16 +0000 Subject: [PATCH 39/65] Make da driver to handle some probably broken Android devices, returning zero media and sector size instead of "Medium not present" error, until some confirmation button is tapped on device. --- sys/cam/scsi/scsi_da.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 7729ecc5ef54..b3b968cc8324 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -1667,7 +1667,10 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) * give them an 'illegal' value we'll avoid that * here. */ - if (block_size >= MAXPHYS || block_size == 0) { + if (block_size == 0 && maxsector == 0) { + snprintf(announce_buf, sizeof(announce_buf), + "0MB (no media?)"); + } else if (block_size >= MAXPHYS || block_size == 0) { xpt_print(periph->path, "unsupportable block size %ju\n", (uintmax_t) block_size); From 7aaae327244daa15b9b692c8060aa85d56a85e33 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 19:56:34 +0000 Subject: [PATCH 40/65] sh: Add a test trying to close a descriptor that is not open. In stable/8 and older, this fails. Some of the redirection changes in head have fixed it. --- tools/regression/bin/sh/execution/redir3.0 | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/regression/bin/sh/execution/redir3.0 diff --git a/tools/regression/bin/sh/execution/redir3.0 b/tools/regression/bin/sh/execution/redir3.0 new file mode 100644 index 000000000000..d68e4504ed3d --- /dev/null +++ b/tools/regression/bin/sh/execution/redir3.0 @@ -0,0 +1,3 @@ +# $FreeBSD$ + +3>&- 3>&- From 3dec7d0c153b8079be4569f08d600f173ef39412 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 20:09:49 +0000 Subject: [PATCH 41/65] sh: Check whether dup2 was successful for >&FD and <&FD. A failure (usually caused by FD not being open) is a redirection error. Exp-run done by: pav (with some other sh(1) changes) --- bin/sh/redir.c | 7 +++++-- tools/regression/bin/sh/execution/redir4.0 | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 tools/regression/bin/sh/execution/redir4.0 diff --git a/bin/sh/redir.c b/bin/sh/redir.c index f3ad3afd8e4e..4f0a40c87095 100644 --- a/bin/sh/redir.c +++ b/bin/sh/redir.c @@ -217,8 +217,11 @@ openredirect(union node *redir, char memory[10]) if (redir->ndup.dupfd >= 0) { /* if not ">&-" */ if (memory[redir->ndup.dupfd]) memory[fd] = 1; - else - dup2(redir->ndup.dupfd, fd); + else { + if (dup2(redir->ndup.dupfd, fd) < 0) + error("%d: %s", redir->ndup.dupfd, + strerror(errno)); + } } else { close(fd); } diff --git a/tools/regression/bin/sh/execution/redir4.0 b/tools/regression/bin/sh/execution/redir4.0 new file mode 100644 index 000000000000..57054c17c45f --- /dev/null +++ b/tools/regression/bin/sh/execution/redir4.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +{ echo bad 0>&3; } 2>/dev/null 3>/dev/null 3>&- +exit 0 From 074e83b14e37eb3dc3be90618d2dbce97a56d5ad Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 20:45:13 +0000 Subject: [PATCH 42/65] sh: Make sure defined functions can actually be called. Add some conservative checks on function names: - Disallow expansions or quoting characters; these can only be called via strange control characters - Disallow '/'; these functions cannot be called anyway, as exec.c assumes they are pathnames - Make the CTL* bytes work properly in function names. These are syntax errors. POSIX does not require us to support more than names (letters, digits and underscores, not starting with a digit), but I do not want to restrict it that much at this time. Exp-run done by: pav (with some other sh(1) changes) --- bin/sh/parser.c | 10 +++++++--- tools/regression/bin/sh/parser/func1.0 | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 tools/regression/bin/sh/parser/func1.0 diff --git a/bin/sh/parser.c b/bin/sh/parser.c index 6c504e5dfc1d..ec1510bf4d5b 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -639,10 +639,14 @@ simplecmd(union node **rpp, union node *redir) if (readtoken() != TRP) synexpect(TRP); funclinno = plinno; -#ifdef notdef - if (! goodname(n->narg.text)) + /* + * - Require plain text. + * - Functions with '/' cannot be called. + */ + if (!noexpand(n->narg.text) || quoteflag || + strchr(n->narg.text, '/')) synerror("Bad function name"); -#endif + rmescapes(n->narg.text); n->type = NDEFUN; n->narg.next = command(); funclinno = 0; diff --git a/tools/regression/bin/sh/parser/func1.0 b/tools/regression/bin/sh/parser/func1.0 new file mode 100644 index 000000000000..4e887b25f285 --- /dev/null +++ b/tools/regression/bin/sh/parser/func1.0 @@ -0,0 +1,25 @@ +# $FreeBSD$ +# POSIX does not require these bytes to work in function names, +# but making them all work seems a good goal. + +failures=0 +unset LC_ALL +export LC_CTYPE=en_US.ISO8859-1 +i=128 +set -f +while [ "$i" -le 255 ]; do + c=$(printf \\"$(printf %o "$i")") + ok=0 + eval "$c() { ok=1; }" + $c + ok1=$ok + ok=0 + "$c" + if [ "$ok" != 1 ] || [ "$ok1" != 1 ]; then + echo "Bad results for character $i" >&2 + : $((failures += 1)) + fi + unset -f $c + i=$((i+1)) +done +exit $((failures > 0)) From 427d3f3322a459e14c7130f491ec2e63633d9c10 Mon Sep 17 00:00:00 2001 From: Pyun YongHyeon Date: Sun, 24 Oct 2010 20:54:46 +0000 Subject: [PATCH 43/65] Use bge_chipid to compare controller ids. r214251 incorrectly used bge_chiprev. Reported by: Buganini gmail dot com > --- sys/dev/bge/if_bge.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/dev/bge/if_bge.c b/sys/dev/bge/if_bge.c index 92b39f3c0910..63c4c5dc5ae5 100644 --- a/sys/dev/bge/if_bge.c +++ b/sys/dev/bge/if_bge.c @@ -1695,9 +1695,9 @@ bge_blockinit(struct bge_softc *sc) /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { - if (sc->bge_chiprev == BGE_CHIPID_BCM5906_A0 || - sc->bge_chiprev == BGE_CHIPID_BCM5906_A1 || - sc->bge_chiprev == BGE_CHIPID_BCM5906_A2) + if (sc->bge_chipid == BGE_CHIPID_BCM5906_A0 || + sc->bge_chipid == BGE_CHIPID_BCM5906_A1 || + sc->bge_chipid == BGE_CHIPID_BCM5906_A2) CSR_WRITE_4(sc, BGE_ISO_PKT_TX, (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); } From 713ca255b8fb46c54b2e2005b898a3d9918921e6 Mon Sep 17 00:00:00 2001 From: Pyun YongHyeon Date: Sun, 24 Oct 2010 21:59:51 +0000 Subject: [PATCH 44/65] Add TSO support over VLAN for i82550/i82551. Controller requires VLAN hardware tagging to make TSO work over VLAN. So if VLAN hardware tagging is disabled explicitly clear TSO over VLAN. While I'm here allow disabling VLAN TX checksum offloading. Tested by: Liudas < liudasb <> centras dot lt > MFC after: 10 days --- sys/dev/fxp/if_fxp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sys/dev/fxp/if_fxp.c b/sys/dev/fxp/if_fxp.c index 2a221ebcbe69..5710ca77ca98 100644 --- a/sys/dev/fxp/if_fxp.c +++ b/sys/dev/fxp/if_fxp.c @@ -862,9 +862,9 @@ fxp_attach(device_t dev) ifp->if_capenable |= IFCAP_VLAN_MTU; /* the hw bits already set */ if ((sc->flags & FXP_FLAG_EXT_RFA) != 0) { ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | - IFCAP_VLAN_HWCSUM; + IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | - IFCAP_VLAN_HWCSUM; + IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; } /* @@ -2866,10 +2866,19 @@ fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data) if (ifp->if_flags & IFF_UP) reinit++; } + if ((mask & IFCAP_VLAN_HWCSUM) != 0 && + (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) + ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; + if ((mask & IFCAP_VLAN_HWTSO) != 0 && + (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - reinit++; + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) + ifp->if_capenable &= + ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); + reinit++; } if (reinit > 0 && ifp->if_flags & IFF_UP) fxp_init_body(sc); From 0ef7c8a20b59b68adcc12d65df98f3836816f81c Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Sun, 24 Oct 2010 22:02:36 +0000 Subject: [PATCH 45/65] Add initial inet DDB support for show in_ifaddr and show sin commands which proved to be useful while debugging address list problems. MFC after: 6 days --- sys/conf/files | 1 + sys/netinet/in_debug.c | 120 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 sys/netinet/in_debug.c diff --git a/sys/conf/files b/sys/conf/files index 74f25c1c7c97..34477986ed2a 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2559,6 +2559,7 @@ netinet/if_atm.c optional atm netinet/if_ether.c optional inet ether netinet/igmp.c optional inet netinet/in.c optional inet +netinet/in_debug.c optional inet ddb netinet/ip_carp.c optional inet carp | inet6 carp netinet/in_gif.c optional gif inet | netgraph_gif inet netinet/ip_gre.c optional gre inet diff --git a/sys/netinet/in_debug.c b/sys/netinet/in_debug.c new file mode 100644 index 000000000000..7624f1deedfc --- /dev/null +++ b/sys/netinet/in_debug.c @@ -0,0 +1,120 @@ +/*- + * Copyright (c) 2010 Bjoern A. Zeeb + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include + +#ifdef DDB +#include +#endif + +#include +#include + +#include +#include + +#ifdef DDB +static void +in_show_sockaddr_in(struct sockaddr_in *sin) +{ + +#define SIN_DB_RPINTF(f, e) db_printf("\t %s = " f "\n", #e, sin->e); + db_printf("\tsockaddr_in = %p\n", sin); + SIN_DB_RPINTF("%u", sin_len); + SIN_DB_RPINTF("%u", sin_family); + SIN_DB_RPINTF("%u", sin_port); + SIN_DB_RPINTF("0x%08x", sin_addr.s_addr); + db_printf("\t %s = %02x%02x%02x%02x%02x%02x%02x%02x\n", + "sin_zero[8]", + sin->sin_zero[0], sin->sin_zero[1], + sin->sin_zero[2], sin->sin_zero[3], + sin->sin_zero[4], sin->sin_zero[5], + sin->sin_zero[6], sin->sin_zero[7]); +#undef SIN_DB_RPINTF +} + +DB_SHOW_COMMAND(sin, db_show_sin) +{ + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)addr; + if (sin == NULL) { + /* usage: No need to confess if you didn't sin. */ + db_printf("usage: show sin \n"); + return; + } + + in_show_sockaddr_in(sin); +} + +static void +in_show_in_ifaddr(struct in_ifaddr *ia) +{ + +#define IA_DB_RPINTF(f, e) db_printf("\t %s = " f "\n", #e, ia->e); +#define IA_DB_RPINTF_PTR(f, e) db_printf("\t %s = " f "\n", #e, &ia->e); +#define IA_DB_RPINTF_DPTR(f, e) db_printf("\t *%s = " f "\n", #e, *ia->e); + db_printf("\tin_ifaddr = %p\n", ia); + IA_DB_RPINTF_PTR("%p", ia_ifa); + IA_DB_RPINTF("0x%08lx", ia_net); + IA_DB_RPINTF("0x%08lx", ia_netmask); + IA_DB_RPINTF("0x%08lx", ia_subnet); + IA_DB_RPINTF("0x%08lx", ia_subnetmask); + IA_DB_RPINTF("0x%08x", ia_netbroadcast.s_addr); + IA_DB_RPINTF("%p", ia_hash.le_next); + IA_DB_RPINTF("%p", ia_hash.le_prev); + IA_DB_RPINTF_DPTR("%p", ia_hash.le_prev); + IA_DB_RPINTF("%p", ia_link.tqe_next); + IA_DB_RPINTF("%p", ia_link.tqe_prev); + IA_DB_RPINTF_DPTR("%p", ia_link.tqe_prev); + IA_DB_RPINTF_PTR("%p", ia_addr); + IA_DB_RPINTF_PTR("%p", ia_dstaddr); + IA_DB_RPINTF_PTR("%p", ia_sockmask); +#undef IA_DB_RPINTF_DPTR +#undef IA_DB_RPINTF_PTR +#undef IA_DB_RPINTF +} + +DB_SHOW_COMMAND(in_ifaddr, db_show_in_ifaddr) +{ + struct in_ifaddr *ia; + + ia = (struct in_ifaddr *)addr; + if (ia == NULL) { + db_printf("usage: show in_ifaddr \n"); + return; + } + + in_show_in_ifaddr(ia); +} +#endif From 67e109adbeca35c44f9b3f2e22e41b4db8903114 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 22:03:21 +0000 Subject: [PATCH 46/65] sh: Do not allow overriding a special builtin with a function. This is a syntax error. POSIX does not say explicitly whether defining a function with the same name as a special builtin is allowed, but it does say that it is impossible to call such a function. A special builtin can still be overridden with an alias. This commit is part of a set of changes that will ensure that when something looks like a special builtin to the parser, it is one. (Not the other way around, as it remains possible to call a special builtin named by a variable or other substitution.) Exp-run done by: pav (with some other sh(1) changes) --- bin/sh/parser.c | 5 +++++ tools/regression/bin/sh/execution/func3.0 | 7 +++++++ 2 files changed, 12 insertions(+) create mode 100644 tools/regression/bin/sh/execution/func3.0 diff --git a/bin/sh/parser.c b/bin/sh/parser.c index ec1510bf4d5b..4c1b2726f789 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include "alias.h" #include "show.h" #include "eval.h" +#include "exec.h" /* to check for special builtins */ #ifndef NO_HISTORY #include "myhistedit.h" #endif @@ -606,6 +607,7 @@ simplecmd(union node **rpp, union node *redir) union node *args, **app; union node **orig_rpp = rpp; union node *n = NULL; + int special; /* If we don't have any redirections already, then we must reset */ /* rpp to be the address of the local redir variable. */ @@ -647,6 +649,9 @@ simplecmd(union node **rpp, union node *redir) strchr(n->narg.text, '/')) synerror("Bad function name"); rmescapes(n->narg.text); + if (find_builtin(n->narg.text, &special) >= 0 && + special) + synerror("Cannot override a special builtin with a function"); n->type = NDEFUN; n->narg.next = command(); funclinno = 0; diff --git a/tools/regression/bin/sh/execution/func3.0 b/tools/regression/bin/sh/execution/func3.0 new file mode 100644 index 000000000000..f7a562a0baad --- /dev/null +++ b/tools/regression/bin/sh/execution/func3.0 @@ -0,0 +1,7 @@ +# $FreeBSD$ + +# This may fail when parsing or when defining the function, or the definition +# may silently do nothing. In no event may the function be executed. + +sh -c 'unset() { echo overriding function executed, bad; }; v=1; unset v; exit "${v-0}"' 2>/dev/null +: From d94c86733927e85a8585df5d83f08fa10aedc01a Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 24 Oct 2010 22:25:38 +0000 Subject: [PATCH 47/65] sh: Ignore double-quotes in arithmetic rather than treating them as quotes. This provides similar behaviour, but allows a simpler parser. This changes r206473. Exp-run done by: pav (with some other sh(1) changes) --- bin/sh/mksyntax.c | 3 ++- bin/sh/parser.c | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/sh/mksyntax.c b/bin/sh/mksyntax.c index e63f0d68248f..e4c4d14dc17c 100644 --- a/bin/sh/mksyntax.c +++ b/bin/sh/mksyntax.c @@ -75,6 +75,7 @@ struct synclass synclass[] = { { "CEOF", "end of file" }, { "CCTL", "like CWORD, except it must be escaped" }, { "CSPCL", "these terminate a word" }, + { "CIGN", "character should be ignored" }, { NULL, NULL } }; @@ -232,7 +233,7 @@ main(int argc __unused, char **argv __unused) add("\n", "CNL"); add("\\", "CBACK"); add("`", "CBQUOTE"); - add("\"", "CDQUOTE"); + add("\"", "CIGN"); add("$", "CVAR"); add("}", "CENDVAR"); add("(", "CLP"); diff --git a/bin/sh/parser.c b/bin/sh/parser.c index 4c1b2726f789..9500d9145590 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -1224,10 +1224,7 @@ readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs) if (eofmark != NULL && newvarnest == 0) USTPUTC(c, out); else { - if (state[level].category == TSTATE_ARITH) - state[level].syntax = ARISYNTAX; - else - state[level].syntax = BASESYNTAX; + state[level].syntax = BASESYNTAX; quotef++; } break; @@ -1282,6 +1279,8 @@ readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs) break; case CEOF: goto endword; /* exit outer loop */ + case CIGN: + break; default: if (level == 0) goto endword; /* exit outer loop */ From fa212bfbe6d904d8cc6008a04c50307e8fbde6d6 Mon Sep 17 00:00:00 2001 From: Julian Elischer Date: Sun, 24 Oct 2010 22:59:38 +0000 Subject: [PATCH 48/65] Add two scripts that demonstrate how to make and hook together jails using teh vortual networking feature. Submitted by: Yavuz Gokirmak MFC after: 2 weeks --- share/examples/jails/README | 5 + share/examples/netgraph/virtual.chain | 373 ++++++++++++++++++++++++++ share/examples/netgraph/virtual.lan | 360 +++++++++++++++++++++++++ 3 files changed, 738 insertions(+) create mode 100644 share/examples/jails/README create mode 100644 share/examples/netgraph/virtual.chain create mode 100644 share/examples/netgraph/virtual.lan diff --git a/share/examples/jails/README b/share/examples/jails/README new file mode 100644 index 000000000000..f32ee260d3f3 --- /dev/null +++ b/share/examples/jails/README @@ -0,0 +1,5 @@ +# $FreeBSD$ + +See under share/examples/netgraph for some examples of +making and hooking together jails using netgraph as the +virtual networking fabric. diff --git a/share/examples/netgraph/virtual.chain b/share/examples/netgraph/virtual.chain new file mode 100644 index 000000000000..73a12d61072c --- /dev/null +++ b/share/examples/netgraph/virtual.chain @@ -0,0 +1,373 @@ +#!/bin/sh +# +# Copyright (c) 2010, Yavuz Gokirmak +# +# All rights reserved. +# +# This source code may be used, modified, copied, distributed, and +# sold, in both source and binary form provided that the above +# copyright and these terms are retained, verbatim, as the first +# lines of this file. Under no circumstances is the author +# responsible for the proper functioning of the software nor does +# the author assume any responsibility for damages incurred with +# its use. +# +# $FreeBSD$ +# +# +# This script creates and connects n router like nodes. Complex wide +# area topologies can be created with the help of script. +# +# Virtual nodes are generated via jails and network connections are +# established using ng_eiface(4) node types. +# +# To use this script: +# +# 0. Make your own copy of this example script. +# +# 1. Edit the definition of ${TARGET_TOPOLOGY} to define your virtual +# nodes. Virtual topology definition includes node names and their +# IP address. Target top. sytax: ( name|ip<->name|ip ... ) +# Example 1: ( n1|10.0.2.1/30<->n2|10.0.2.2/30 ...) +# Example 2: ( n1|2001:b90::14a/125<->n1|2001:b90::14b/125 ...) +# +# 2. Run this script with "start" as the command line argument. +# +# 3. Add necessary static route commands for each virtual node. For +# example assume you have three virtual nodes connected each other +# llike a chain ( n1 is connected to n2, n2 is connecte to n3 ). +# In order to estabklish connectivity among these virtual nodes, +# you have to add default routes to node n1 and node n3. Example +# static route command is: +# STATIC_ROUTE0="jexec n1 route add -inet default 10.0.2.2" +# STATIC_ROUTE1="jexec n3 route add -inet default 10.0.2.5" +# After defining default routes with above format you have to set +# the total number of static route commands as: +# STATIC_ROUTE_CNT=2 +# +# 4. Stop bridging by running this script with "stop" as the +# command line argument. +# +# 5. This cript uses a template file in order to carry information +# between start and stop calls. +# In the start call, the netgraph interfaces and jails are created. +# At the stop phase, all created objects should be removed. +# DO NOT delete the temporary file between the start and stop phases. +# +# Target Topology: +# +# +---------------+ +---------------------------------------------+ +# | n1 (vimage) | | n2 (vimage) | +# | | | | +# | +-----------+ | | +-----------+ +-----------+ +-----------+ | +# | | ngeth0 | | | | ngeth1 | | ngeth2 | | ngeth4 | | +# | |(ng_eiface)| | | |(ng_eiface)| |(ng_eiface)| |(ng_eiface)| | +# | +--+-----+--+ | | +--+-----+--+ +--+-----+--+ +--+-----+--+ | +# | |ether| | | |ether| |ether| |ether| | +# | +-X---+ | | +--X--+ +--X--+ +--X--+ | +# +-------X-------+ +------X--------------X---------------X-------+ +# X X X X +# X X X X +# XXXXXXXXXXXXXXX X X +# X X +# +--------X------+ +--------X------+ +# | -+--X--+- | | -+--X--+- | +# | |ether| | | |ether| | +# | +--+-----+--+ | | +--+-----+--+ | +# | | ngeth3 | | | | ngeth5 | | +# | |(ng_eiface)| | | |(ng_eiface)| | +# | +-----------+ | | +-----------+ | +# | | | | +# | n3 (vimage) | | n4 (vimage) | +# +---------------+ +---------------+ +# +# +# + +# List the names of virtual nodes and their IP addresses. Use ':' +# character to seperate node name from node IP address and netmask. + +TARGET_TOPOLOGY="n1|10.0.2.1/30<->n2|10.0.2.2/30 n2|10.0.2.5/30<->n3|10.0.2.6/30 n2|10.0.2.9/30<->n4|10.0.2.10/30" +STATIC_ROUTE0="jexec n1 route add -inet default 10.0.2.2" +STATIC_ROUTE1="jexec n3 route add -inet default 10.0.2.5" +STATIC_ROUTE2="jexec n4 route add -inet default 10.0.2.9" +STATIC_ROUTE_CNT=3 + +# MAC manifacturer prefix. This can be modified according to needs. +MAC_PREFIX="00:1d:92" + +# Temporary file is important for proper execution of script. +TEMP_FILE="/var/tmp/.virtual.chain.tmp" + +# Set root directory for jails to be created. +JAIL_PATH="/usr/jails/router" + + +#################################################################### +#### Nothing below this point should need to be modified. #### +#################################################################### + + +# Start/restart routine. +virtual_chain_start() { + + # Load netgraph KLD's as necessary. + + for KLD in ng_ether ng_bridge ng_eiface; do + if ! kldstat -v | grep -qw ${KLD}; then + echo -n "Loading ${KLD}.ko... " + kldload ${KLD} || exit 1 + echo "done" + fi + done + + # Reset all interfaces and jails. If temporary file can not be found + # script assumes that there is no previous configuration. + + if [ ! -e ${TEMP_FILE} ]; then + echo "No previous configuration(${TEMP_FILE}) found to clean-up." + else + echo -n "Cleaning previous configuration..." + virtual_chain_stop + echo "done" + fi + + # Create temporary file for usage. This file includes generated + # interface names and jail names. All bridges, interfaces and jails + # are written to file while created. In clean-up process written + # objects are cleaned (i.e removed) from system. + + if [ -e ${TEMP_FILE} ]; then + touch ${TEMP_FILE} + fi + + + # Attach other interfaces as well. + for CONNECTION in ${TARGET_TOPOLOGY}; do + + # Virtual connections are defined in TARGET_TOPOLOGY variable. + # They have the form of 'nodeName|IPaddr'. Below two lines split + + PEER1=`echo ${CONNECTION} | awk -F"<->" '{print $1}'` + PEER1_NAME=`echo ${PEER1} | awk -F"|" '{print $1}'` + PEER1_IP=`echo ${PEER1} | awk -F"|" '{print $2}'` + + PEER2=`echo ${CONNECTION} | awk -F"<->" '{print $2}'` + PEER2_NAME=`echo ${PEER2} | awk -F"|" '{print $1}'` + PEER2_IP=`echo ${PEER2} | awk -F"|" '{print $2}'` + + # !!! if not created already.. + # Create virtual node (jail) with given name and using + # JAIL_PATH as root directory for jail. + + virtual_chain_create_peer_if_necessary ${PEER1_NAME} + virtual_chain_create_peer_if_necessary ${PEER2_NAME} + + # create an interface for peer with the given peer IP. Get interface + # for future use; you will connect this interface to the other + # peers' (PEER2) interface. + virtual_chain_create_interface_with_ip ${PEER1_NAME} ${PEER1_IP} + PEER1_INTERFACE=${RET_INTERFACE} + + # create an interface for peer with the given peer IP. Get interface + # for future use; you will connect this interface to the other + # peers' (PEER2) interface. + virtual_chain_create_interface_with_ip ${PEER2_NAME} ${PEER2_IP} + PEER2_INTERFACE=${RET_INTERFACE} + + # Connect virtual interface to other interface. Syntax is : + # ngctl connect INTERFACE1: INTERFACE2: ether ether. + + echo -n "Connecting ${PEER1_INTERFACE}:ether to ${PEER2_INTERFACE}:ether..." + ngctl connect ${PEER1_INTERFACE}: ${PEER2_INTERFACE}: ether ether \ + || exit 1 + echo "done" + + done + + # Executes static route add commands. + i=0 + while [ $i != $STATIC_ROUTE_CNT ]; do + eval ROUTE=\${STATIC_ROUTE${i}} + ret=`${ROUTE}` + i=`expr $i + 1` + done + + echo "Virtual WAN established succesfully!" +} + +virtual_chain_create_interface_with_ip() { + + NODE_NAME=$1 + NODE_IP=$2 + + # Create a ng_eiface object for virtual node. ng_eiface + # object has a hook that can be connected to one of bridge + # links. After creating interface get its automatically + # generated name for further usage. + + echo "Creating eiface interface for virtual node ${NODE_NAME}." + ngctl mkpeer eiface ether ether + EIFACE=`ngctl l | grep ngeth | tail -n 1| awk '{print $2}'` + echo "Interface ${EIFACE} is created." + + # Write name of the interface to temp file. Clean-up procedure + # will use this name to shutdown interface. + + echo "interface ${EIFACE}" >> ${TEMP_FILE} + + # Move virtual interface to virtual node. Note that Interface + # name will not be changed at the end of this movement. Moved + # interface can be seen at the output of ifconfig command in + # jail: 'jexec jailname ifconfig' + + echo "Moving ${EIFACE} to ${NODE_NAME}" + ifconfig ${EIFACE} vnet ${NODE_NAME} + + # Make lo0 interface localhost. + jexec ${NODE_NAME} ifconfig lo0 localhost + + # Generate a random mac address for virtual interface. First + # three octets can be changed by user. Last three octets are + # generated randomly. + M4=`od -An -N2 -i /dev/random | sed -e 's/ //g' | \ + awk '{ print $1 % 256 }'` + M5=`od -An -N2 -i /dev/random | sed -e 's/ //g' | \ + awk '{ print $1 % 256 }'` + M6=`od -An -N2 -i /dev/random | sed -e 's/ //g' | \ + awk '{ print $1 % 256 }'` + + MAC=`printf ${MAC_PREFIX}:%02x:%02x:%02x ${M4} ${M5} ${M6}` + + # Set the link address (mac address) of virtual interface in + # virtual node to randomly generated MAC. + echo "Setting MAC address of ${EIFACE} to '${MAC}'" + jexec ${NODE_NAME} ifconfig ${EIFACE} link $MAC + + # Either IPv4 or IPv6 can be used in this script. Ifconfig + # IP setting syntax differs slightly for two IP versions. + # For version 4 'inet' keyword is used whereas for version 6 + # 'inet6' is used. Below line tries to decide which IP version + # is given and sets IPVER to 'inet' or 'inet6'. + + IPVER=`echo ${NODE_IP} | awk -F"." '{ split($4,last,"/"); \ + if( NF==4 && $1>0 && $1<256 && $2<256 && $3<256 && \ + last[1]<256) print "inet"; else print "inet6"}'` + + # Set IP address of virtual interface in virtual node. + echo "Setting IP address of ${EIFACE} to '${NODE_IP}'" + jexec ${NODE_NAME} ifconfig ${EIFACE} ${IPVER} ${NODE_IP} + + RET_INTERFACE=${EIFACE} +} + +virtual_chain_create_peer_if_necessary() { + + if ! grep -q $1 ${TEMP_FILE} ; then + + echo -n "Creating virtual node (jail) ${1}..." + jail -c vnet name=${1} host.hostname=${1} \ + path=${JAIL_PATH} persist + jexec ${1} sysctl -w net.inet.ip.forwarding=1 + jexec ${1} sysctl -w net.inet6.ip6.forwarding=1 + echo "done" + + # Write name of the jail to temp file. Clean-up + # procedure will use this name to remove jail. + + echo "node ${1}" >> ${TEMP_FILE} + fi + +} + + +# Stop routine. +virtual_chain_stop() { + + if [ ! -e ${TEMP_FILE} ]; then + echo "Nothing to stop! ${TEMP_FILE}: temp file not found" + else + + echo -n "Shutdown bridge interface.." + OBJECTS=`cat ${TEMP_FILE} | grep bridge | awk '{print $2}'` + for BRIDGE in ${OBJECTS}; do + ngctl shutdown ${BRIDGE}: >/dev/null 2>&1 + done + echo "done" + + echo -n "Shutdown all eiface interfaces..." + OBJECTS=`cat ${TEMP_FILE} | grep interface | awk '{print $2}'` + for INTERFACE in ${OBJECTS}; do + ngctl shutdown ${INTERFACE}: >/dev/null 2>&1 + done + echo "done" + + echo -n "Removing all jails..." + OBJECTS=`cat ${TEMP_FILE} | grep node | awk '{print $2}'` + for NODE in ${OBJECTS}; do + jail -r ${NODE} + done + echo "done" + + echo "Removing tempfile ${TEMP_FILE}" + rm ${TEMP_FILE} + fi + echo "Virtual LAN objects removed succesfully!" + +} + +virtual_chain_usage() { + echo "usage: $0 start [target_topology]" + echo " : $0 [ stop | help ]" +} + + +# Main entry point. + + +case $# in + 1) + case $1 in + start) + echo -n "Creating default target topology:" + echo " ${TARGET_TOPOLOGY}" + virtual_chain_start + ;; + stop) + + if [ ! -e ${TEMP_FILE} ]; then + echo -n "Noting to stop! ${TEMP_FILE}:" + echo " temp file not found" + else + virtual_chain_stop + fi + ;; + help) + virtual_chain_usage + exit 1 + ;; + *) + virtual_chain_usage + exit 1 + + esac + ;; + 2) + case $1 in + start) + TARGET_TOPOLOGY=$2 + echo -n "Creating target topology:" + echo "${TARGET_TOPOLOGY}" + virtual_chain_start + ;; + *) + virtual_chain_usage + exit 1 + esac + ;; + + *) + virtual_chain_usage + exit 1 +esac + diff --git a/share/examples/netgraph/virtual.lan b/share/examples/netgraph/virtual.lan new file mode 100644 index 000000000000..bce33e1f9a2f --- /dev/null +++ b/share/examples/netgraph/virtual.lan @@ -0,0 +1,360 @@ +#!/bin/sh +# +# Copyright (c) 2010, Yavuz Gokirmak +# +# All rights reserved. +# +# This source code may be used, modified, copied, distributed, and +# sold, in both source and binary form provided that the above +# copyright and these terms are retained, verbatim, as the first +# lines of this file. Under no circumstances is the author +# responsible for the proper functioning of the software nor does +# the author assume any responsibility for damages incurred with +# its use. +# +# $FreeBSD$ + +# +# This script adds virtual nodes to one of the physical interfaces +# visible on your local area network (LAN). Virtual nodes seems real +# to external observers. +# If traceroute is executed to one of virtual nodes, the IP +# address of the physical interface will not be seen in the output. +# Virtual nodes are generated via jails and network connections are +# established using ng_bridge(4) and ng_eiface(4) node types. +# +# To use this script: +# +# 0. Make your own copy of this example script. +# +# 1. Edit the definition of ${ETHER_INTF} as described below +# to define your real interface connected to the LAN. Virtual nodes +# will placed on the same physical network as this interface. +# +# 2. Edit the definition of ${TARGET_TOPOLOGY} to define your virtual +# nodes. Virtual topology definition includes node names and their +# IP address. Target top. sytax: ( node1|ip1/24 node2|ip2/24 ... ) +# Example 1: ( n1|122.122.122.12/24, n2|122.122.122.13/24 ...) +# Example 2: ( n1|2001:b90::14a/125, n1|2001:b90::14b/125 ...) +# +# 3. Run this script with "start" as the command line argument. +# +# 4. Stop bridging by running this script with "stop" as the +# command line argument. +# +# 5. This cript uses a template file in order to carry information +# between start and stop calls. +# In the start call, the netgraph interfaces and jails are created. +# At the stop phase, all created objects should be removed. +# DO NOT delete the temporary file between the start and stop phases. +# +# To add virtual nodes for multiple independent LANs, create multiple +# copies of this script with different variable definitions. +# +# Target Topology: +# +# +# +---------------+ +---------------+ +---------------+ +# | n0 (vimage) | | n1 (vimage) | | nk (vimage) | +# | | | | | | +# | +-----------+ | | +-----------+ | | +-----------+ | +# | | ngeth0 | | | | ngeth1 | | | | ngethk | | +# | |(ng_eiface)| | | |(ng_eiface)| | | |(ng_eiface)| | +# | +--+-----+--+ | | +--+-----+--+ | | +--+-----+--+ | +# | |ether| | | |ether| | | |ether| | +# | +--X--+ | | +--X--+ | | +---X-+ | +# +-----+ +--------\------+ +--------\------+ +-------/-------+ +# |upper|----\ \ip_addr \ip_addr /ip_addr +# +-+-----+--+ \ \ \ \ +# | em0 | \ +--------+ +-+ \ +# |(ng_ether)| +-----------+ \ \ \ +# +-+-----+--+ \ \ / \ +# |lower| +---------\ \ \ / / +# +--X--+ / O--X--O O-X---O O---X-O O--X--O O---X---O +# \ | |link0| |link1| |link2| |link3| |linkk+2| +# \ / +-O-----O-O-----O-O-----O-O-----O-----O-------O-+ +# +---+ | | +# | bridge (ng_bridge) | +# +-----------------------------------------------+ +# +# + +# Give the name of ethernet interface. Virtual nodes will be seen as +# local neighbours of this interface. + +ETHER_INTF="em0" + +# List the names of virtual nodes and their IP addresses. Use ':' +# character to seperate node name from node IP address and netmask. + +TARGET_TOPOLOGY="c1|10.0.2.20/24 c2|10.0.2.21/24 c3|10.0.2.22/24" + +# MAC manifacturer prefix. This can be modified according to needs. +MAC_PREFIX="00:1d:92" + +# Temporary file is important for proper execution of script. +TEMP_FILE="/var/tmp/.virtual.lan.tmp" + +# Set root directory for jails to be created. +JAIL_PATH="/usr/jails/node" + + +#################################################################### +#### Nothing below this point should need to be modified. #### +#################################################################### + + +# Start/restart routine. +virtual_lan_start() { + + # Load netgraph KLD's as necessary. + + for KLD in ng_ether ng_bridge ng_eiface; do + if ! kldstat -v | grep -qw ${KLD}; then + echo -n "Loading ${KLD}.ko... " + kldload ${KLD} || exit 1 + echo "done" + fi + done + + # Reset all interfaces and jails. If temporary file can not be found + # script assumes that there is no previous configuration. + + if [ ! -e ${TEMP_FILE} ]; then + echo "No previous configuration(${TEMP_FILE}) found to clean-up." + else + echo -n "Cleaning previous configuration..." + virtual_lan_stop + echo "done" + fi + + # Create temporary file for usage. This file includes generated + # interface names and jail names. All bridges, interfaces and jails + # are written to file while created. In clean-up process written + # objects are cleaned (i.e removed) from system. + + if [ -e ${TEMP_FILE} ]; then + touch ${TEMP_FILE} + fi + + echo -n "Verifying ethernet interface existence..." + # Verify ethernet interface exist. + if ! ngctl info ${ETHER_INTF}: >/dev/null 2>&1; then + echo "Error: interface ${ETHER_INTF} does not exist" + exit 1 + fi + ifconfig ${ETHER_INTF} up || exit 1 + echo "done" + + # Get current number of bridge interfaces in the system. This number + # is used to create a name for new bridge. + BRIDGE_COUNT=`ngctl l | grep bridge | wc -l | sed -e "s/ //g"` + BRIDGE_NAME="bridge${BRIDGE_COUNT}" + + # Create new ng_bridge(4) node and attach it to the ethernet interface. + # Connect ng_ether:lower hook to bridge:link0 when creating bridge and + # connect ng_ether:upper hook to bridge:link1 after bridge name is set. + + echo "Creating bridge interface: ${BRIDGE_NAME}..." + ngctl mkpeer ${ETHER_INTF}: bridge lower link0 || exit 1 + ngctl name ${ETHER_INTF}:lower ${BRIDGE_NAME} || exit 1 + ngctl connect ${ETHER_INTF}: ${BRIDGE_NAME}: upper link1 || exit 1 + echo "Bridge ${BRIDGE_NAME} is created and ${ETHER_INTF} is connected." + + # In the above code block two hooks are connected to bridge interface, + # therefore LINKNUM is set to 2 indicating total number of connected + # hooks on the bridge interface. + LINKNUM=2 + + # Write name of the bridge to temp file. Clean-up procedure will use + # this name to shutdown bridge interface. + echo "bridge ${BRIDGE_NAME}" > ${TEMP_FILE} + + + # Attach other interfaces as well. + for NODE in ${TARGET_TOPOLOGY}; do + + # Virtual nodes are defined in TARGET_TOPOLOGY variable. They + # have the form of 'nodeName|IPaddr'. Below two lines split + # node definition to get node name and node IP. + + NODE_NAME=`echo ${NODE} | awk -F"|" '{print $1}'` + NODE_IP=`echo ${NODE} | awk -F"|" '{print $2}'` + + # Create virtual node (jail) with given name and using + # JAIL_PATH as root directory for jail. + + echo -n "Creating virtual node (jail) ${NODE_NAME}..." + jail -c vnet name=${NODE_NAME} host.hostname=${NODE_NAME} \ + path=${JAIL_PATH} persist + echo "done" + + # Write name of the jail to temp file. Clean-up procedure will + # use this name to remove jail. + + echo "node ${NODE_NAME}" >> ${TEMP_FILE} + + # Create a ng_eiface object for virtual node. ng_eiface + # object has a hook that can be connected to one of bridge + # links. After creating interface get its automatically + # generated name for further usage. + + echo "Creating eiface interface for virtual node ${NODE_NAME}." + ngctl mkpeer eiface ether ether + EIFACE=`ngctl l | grep ngeth | tail -n 1| awk '{print $2}'` + echo "Interface ${EIFACE} is created." + + # Write name of the interface to temp file. Clean-up procedure + # will use this name to shutdown interface. + + echo "interface ${EIFACE}" >> ${TEMP_FILE} + + # Move virtual interface to virtual node. Note that Interface + # name will not be changed at the end of this movement. Moved + # interface can be seen at the output of ifconfig command in + # jail: 'jexec jailname ifconfig' + + echo "Moving ${EIFACE} to ${NODE_NAME}" + ifconfig ${EIFACE} vnet ${NODE_NAME} + + # Make lo0 interface localhost. + jexec ${NODE_NAME} ifconfig lo0 localhost + + # Generate a random mac address for virtual interface. First + # three octets can be changed by user. Last three octets are + # generated randomly. + M4=`od -An -N2 -i /dev/random | sed -e 's/ //g' | \ + awk '{ print $1 % 256 }'` + M5=`od -An -N2 -i /dev/random | sed -e 's/ //g' | \ + awk '{ print $1 % 256 }'` + M6=`od -An -N2 -i /dev/random | sed -e 's/ //g' | \ + awk '{ print $1 % 256 }'` + + MAC=`printf ${MAC_PREFIX}:%02x:%02x:%02x ${M4} ${M5} ${M6}` + + # Set the link address (mac address) of virtual interface in + # virtual node to randomly generated MAC. + echo "Setting MAC address of ${EIFACE} to '${MAC}'" + jexec ${NODE_NAME} ifconfig ${EIFACE} link $MAC + + # Either IPv4 or IPv6 can be used in this script. Ifconfig + # IP setting syntax differs slightly for two IP versions. + # For version 4 'inet' keyword is used whereas for version 6 + # 'inet6' is used. Below line tries to decide which IP version + # is given and sets IPVER to 'inet' or 'inet6'. + + IPVER=`echo ${NODE_IP} | awk -F"." '{ split($4,last,"/"); \ + if( NF==4 && $1>0 && $1<256 && $2<256 && $3<256 && \ + last[1]<256) print "inet"; else print "inet6"}'` + + # Set IP address of virtual interface in virtual node. + echo "Setting IP address of ${EIFACE} to '${NODE_IP}'" + jexec ${NODE_NAME} ifconfig ${EIFACE} ${IPVER} ${NODE_IP} + + # Connect virtual interface to bridge interface. Syntax is : + # ngctl connect INTERFACE: BRIDGE: INTERFACE_HOOK EMPTY_LINK. + # Interface has one hook named 'ether' and below line connects + # ether hook to bridge's first unconnected link. + + echo -n "Connecting ${EIFACE}:ether to ${BRIDGE_NAME}:link${LINKNUM}..." + ngctl connect ${EIFACE}: ${BRIDGE_NAME}: ether link${LINKNUM} \ + || exit 1 + echo "done" + + # Now, bridge has one more connected link thus link count is + # incremented. + LINKNUM=`expr ${LINKNUM} + 1` + done + echo "Virtual LAN established succesfully!" + +} + +# Stop routine. +virtual_lan_stop() { + + if [ ! -e ${TEMP_FILE} ]; then + echo "Nothing to stop! ${TEMP_FILE}: temp file not found" + else + + echo -n "Shutdown bridge interface.." + OBJECTS=`cat ${TEMP_FILE} | grep bridge | awk '{print $2}'` + for BRIDGE in ${OBJECTS}; do + ngctl shutdown ${BRIDGE}: >/dev/null 2>&1 + done + echo "done" + + echo -n "Shutdown all eiface interfaces..." + OBJECTS=`cat ${TEMP_FILE} | grep interface | awk '{print $2}'` + for INTERFACE in ${OBJECTS}; do + ngctl shutdown ${INTERFACE}: >/dev/null 2>&1 + done + echo "done" + + echo -n "Removing all jails..." + OBJECTS=`cat ${TEMP_FILE} | grep node | awk '{print $2}'` + for NODE in ${OBJECTS}; do + jail -r ${NODE} + done + echo "done" + + echo "Removing tempfile ${TEMP_FILE}" + rm ${TEMP_FILE} + fi + echo "Virtual LAN objects removed succesfully!" + +} + +virtual_lan_usage() { + echo "usage: $0 start [target_topology]" + echo " : $0 [ stop | help ]" +} + + +# Main entry point. + +case $# in + 1) + case $1 in + start) + echo -n "Creating default target topology:" + echo " ${TARGET_TOPOLOGY}" + virtual_lan_start + ;; + stop) + + if [ ! -e ${TEMP_FILE} ]; then + echo -n "Noting to stop! ${TEMP_FILE}:" + echo " temp file not found" + else + virtual_lan_stop + fi + ;; + help) + virtual_lan_usage + exit 1 + ;; + *) + virtual_lan_usage + exit 1 + + esac + ;; + 2) + case $1 in + start) + TARGET_TOPOLOGY=$2 + echo -n "Creating target topology:" + echo "${TARGET_TOPOLOGY}" + virtual_lan_start + ;; + *) + virtual_lan_usage + exit 1 + esac + ;; + + *) + virtual_lan_usage + exit 1 +esac + From 136347a1472c278831a867096f5d94f56185b512 Mon Sep 17 00:00:00 2001 From: Julian Elischer Date: Sun, 24 Oct 2010 23:19:11 +0000 Subject: [PATCH 49/65] Add a jails directory in the examples section MFC after: 2 weeks --- etc/mtree/BSD.usr.dist | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etc/mtree/BSD.usr.dist b/etc/mtree/BSD.usr.dist index 1443ada8923c..64be70b0678d 100644 --- a/etc/mtree/BSD.usr.dist +++ b/etc/mtree/BSD.usr.dist @@ -227,6 +227,8 @@ .. iscsi .. + jails + .. kld cdev module From 3082dd3fc7372032d322b738439a633f1056c8b8 Mon Sep 17 00:00:00 2001 From: Julian Elischer Date: Sun, 24 Oct 2010 23:25:14 +0000 Subject: [PATCH 50/65] Add Makefile entry to install two new example files. MFC after: 2 weeks --- share/examples/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/share/examples/Makefile b/share/examples/Makefile index 7d011e3a9cde..6a510a77fee2 100644 --- a/share/examples/Makefile +++ b/share/examples/Makefile @@ -17,6 +17,7 @@ LDIRS= BSD_daemon \ ibcs2 \ indent \ ipfw \ + jails \ kld \ libvgl \ mdoc \ @@ -80,6 +81,7 @@ XFILES= BSD_daemon/FreeBSD.pfa \ ibcs2/hello.uu \ indent/indent.pro \ ipfw/change_rules.sh \ + jails/README \ kld/Makefile \ kld/cdev/Makefile \ kld/cdev/README \ @@ -116,6 +118,8 @@ XFILES= BSD_daemon/FreeBSD.pfa \ netgraph/ngctl \ netgraph/raw \ netgraph/udp.tunnel \ + netgraph/virtual.chain \ + netgraph/virtual.lan \ nwclient/dot.nwfsrc \ nwclient/nwfs.sh.sample \ perfmon/Makefile \ From 068e4f6f7a96ab9425395133fc3c679d14c95e56 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 25 Oct 2010 07:17:07 +0000 Subject: [PATCH 51/65] Do some whitespace and `identify` output cleanup. Submitted by: arundel MFC after: 2 weeks --- sbin/camcontrol/camcontrol.c | 107 +++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 50 deletions(-) diff --git a/sbin/camcontrol/camcontrol.c b/sbin/camcontrol/camcontrol.c index b92cc4c183dc..9f16f2b69a79 100644 --- a/sbin/camcontrol/camcontrol.c +++ b/sbin/camcontrol/camcontrol.c @@ -116,7 +116,7 @@ typedef enum { } cam_argmask; struct camcontrol_opts { - const char *optname; + const char *optname; cam_cmdmask cmdnum; cam_argmask argnum; const char *subopt; @@ -204,7 +204,7 @@ static int readdefects(struct cam_device *device, int argc, char **argv, char *combinedopt, int retry_count, int timeout); static void modepage(struct cam_device *device, int argc, char **argv, char *combinedopt, int retry_count, int timeout); -static int scsicmd(struct cam_device *device, int argc, char **argv, +static int scsicmd(struct cam_device *device, int argc, char **argv, char *combinedopt, int retry_count, int timeout); static int tagcontrol(struct cam_device *device, int argc, char **argv, char *combinedopt); @@ -234,7 +234,7 @@ static int atapm(struct cam_device *device, int argc, char **argv, #endif camcontrol_optret -getoption(char *arg, cam_cmdmask *cmdnum, cam_argmask *argnum, +getoption(char *arg, cam_cmdmask *cmdnum, cam_argmask *argnum, const char **subopt) { struct camcontrol_opts *opts; @@ -622,7 +622,7 @@ scsistart(struct cam_device *device, int startstop, int loadeject, else fprintf(stdout, "Error received from stop unit command\n"); - + if (arglist & CAM_ARG_VERBOSE) { cam_error_print(device, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); @@ -688,7 +688,7 @@ scsiinquiry(struct cam_device *device, int retry_count, int timeout) union ccb *ccb; struct scsi_inquiry_data *inq_buf; int error = 0; - + ccb = cam_getccb(device); if (ccb == NULL) { @@ -721,13 +721,13 @@ scsiinquiry(struct cam_device *device, int retry_count, int timeout) * scsi_inquiry() will convert an inq_len (which is passed in as * a u_int32_t, but the field in the CDB is only 1 byte) of 256 * to 0. Evidently, very few devices meet the spec in that - * regard. Some devices, like many Seagate disks, take the 0 as + * regard. Some devices, like many Seagate disks, take the 0 as * 0, and don't return any data. One Pioneer DVD-R drive * returns more data than the command asked for. * * So, since there are numerous devices that just don't work * right with the full inquiry size, we don't send the full size. - * + * * - The second reason not to use the full inquiry data length is * that we don't need it here. The only reason we issue a * standard inquiry is to get the vendor name, device name, @@ -1181,7 +1181,7 @@ atacapprint(struct ata_params *parm) } printf("\nFeature " - "Support Enable Value Vendor\n"); + "Support Enabled Value Vendor\n"); printf("read ahead %s %s\n", parm->support.command1 & ATA_SUPPORT_LOOKAHEAD ? "yes" : "no", parm->enabled.command1 & ATA_SUPPORT_LOOKAHEAD ? "yes" : "no"); @@ -1201,16 +1201,13 @@ atacapprint(struct ata_params *parm) ATA_QUEUE_LEN(parm->queue) + 1); } else printf("\n"); - if (parm->satacapabilities && parm->satacapabilities != 0xffff) { - printf("Native Command Queuing (NCQ) %s ", - parm->satacapabilities & ATA_SUPPORT_NCQ ? - "yes" : "no"); - if (parm->satacapabilities & ATA_SUPPORT_NCQ) { - printf(" %d tags\n", - ATA_QUEUE_LEN(parm->queue) + 1); - } else - printf("\n"); - } + printf("Native Command Queuing (NCQ) "); + if (parm->satacapabilities != 0xffff && + (parm->satacapabilities & ATA_SUPPORT_NCQ)) { + printf("yes %d tags\n", + ATA_QUEUE_LEN(parm->queue) + 1); + } else + printf("no\n"); printf("SMART %s %s\n", parm->support.command1 & ATA_SUPPORT_SMART ? "yes" : "no", parm->enabled.command1 & ATA_SUPPORT_SMART ? "yes" : "no"); @@ -1223,28 +1220,39 @@ atacapprint(struct ata_params *parm) printf("power management %s %s\n", parm->support.command1 & ATA_SUPPORT_POWERMGT ? "yes" : "no", parm->enabled.command1 & ATA_SUPPORT_POWERMGT ? "yes" : "no"); - printf("advanced power management %s %s %d/0x%02X\n", + printf("advanced power management %s %s", parm->support.command2 & ATA_SUPPORT_APM ? "yes" : "no", - parm->enabled.command2 & ATA_SUPPORT_APM ? "yes" : "no", - parm->apm_value, parm->apm_value); - printf("automatic acoustic management %s %s " - "%d/0x%02X %d/0x%02X\n", + parm->enabled.command2 & ATA_SUPPORT_APM ? "yes" : "no"); + if (parm->support.command2 & ATA_SUPPORT_APM) { + printf(" %d/0x%02X\n", + parm->apm_value, parm->apm_value); + } else + printf("\n"); + printf("automatic acoustic management %s %s", parm->support.command2 & ATA_SUPPORT_AUTOACOUSTIC ? "yes" :"no", - parm->enabled.command2 & ATA_SUPPORT_AUTOACOUSTIC ? "yes" :"no", - ATA_ACOUSTIC_CURRENT(parm->acoustic), - ATA_ACOUSTIC_CURRENT(parm->acoustic), - ATA_ACOUSTIC_VENDOR(parm->acoustic), - ATA_ACOUSTIC_VENDOR(parm->acoustic)); + parm->enabled.command2 & ATA_SUPPORT_AUTOACOUSTIC ? "yes" :"no"); + if (parm->support.command2 & ATA_SUPPORT_AUTOACOUSTIC) { + printf(" %d/0x%02X %d/0x%02X\n", + ATA_ACOUSTIC_CURRENT(parm->acoustic), + ATA_ACOUSTIC_CURRENT(parm->acoustic), + ATA_ACOUSTIC_VENDOR(parm->acoustic), + ATA_ACOUSTIC_VENDOR(parm->acoustic)); + } else + printf("\n"); printf("media status notification %s %s\n", parm->support.command2 & ATA_SUPPORT_NOTIFY ? "yes" : "no", parm->enabled.command2 & ATA_SUPPORT_NOTIFY ? "yes" : "no"); printf("power-up in Standby %s %s\n", parm->support.command2 & ATA_SUPPORT_STANDBY ? "yes" : "no", parm->enabled.command2 & ATA_SUPPORT_STANDBY ? "yes" : "no"); - printf("write-read-verify %s %s %d/0x%x\n", + printf("write-read-verify %s %s", parm->support2 & ATA_SUPPORT_WRITEREADVERIFY ? "yes" : "no", - parm->enabled2 & ATA_SUPPORT_WRITEREADVERIFY ? "yes" : "no", - parm->wrv_mode, parm->wrv_mode); + parm->enabled2 & ATA_SUPPORT_WRITEREADVERIFY ? "yes" : "no"); + if (parm->support2 & ATA_SUPPORT_WRITEREADVERIFY) { + printf(" %d/0x%x\n", + parm->wrv_mode, parm->wrv_mode); + } else + printf("\n"); printf("unload %s %s\n", parm->support.extension & ATA_SUPPORT_UNLOAD ? "yes" : "no", parm->enabled.extension & ATA_SUPPORT_UNLOAD ? "yes" : "no"); @@ -1255,7 +1263,6 @@ atacapprint(struct ata_params *parm) parm->support_dsm & ATA_SUPPORT_DSM_TRIM ? "yes" : "no"); } - static int ataidentify(struct cam_device *device, int retry_count, int timeout) { @@ -1902,7 +1909,7 @@ readdefects(struct cam_device *device, int argc, char **argv, /* * XXX KDM I should probably clean up the printout format for the - * disk defects. + * disk defects. */ switch (returned_format & SRDDH10_DLIST_FORMAT_MASK){ case SRDDH10_PHYSICAL_SECTOR_FORMAT: @@ -2011,7 +2018,7 @@ void reassignblocks(struct cam_device *device, u_int32_t *blocks, int num_blocks) { union ccb *ccb; - + ccb = cam_getccb(device); cam_freeccb(ccb); @@ -2114,7 +2121,7 @@ mode_select(struct cam_device *device, int save_pages, int retry_count, err(1, "error sending mode select command"); else errx(1, "error sending mode select command"); - + } cam_freeccb(ccb); @@ -2294,7 +2301,7 @@ scsicmd(struct cam_device *device, int argc, char **argv, char *combinedopt, if (arglist & CAM_ARG_CMD_IN) { warnx("command must either be " "read or write, not both"); - error = 1; + error = 1; goto scsicmd_bailout; } arglist |= CAM_ARG_CMD_OUT; @@ -2611,7 +2618,7 @@ camdebug(int argc, char **argv, char *combinedopt) warnx("bus:target, or bus:target:lun to debug"); } } - + if (error == 0) { ccb.ccb_h.func_code = XPT_DEBUG; @@ -2874,7 +2881,7 @@ cts_print(struct cam_device *device, struct ccb_trans_settings *cts) } /* - * Get a path inquiry CCB for the specified device. + * Get a path inquiry CCB for the specified device. */ static int get_cpi(struct cam_device *device, struct ccb_pathinq *cpi) @@ -2913,7 +2920,7 @@ get_cpi(struct cam_device *device, struct ccb_pathinq *cpi) } /* - * Get a get device CCB for the specified device. + * Get a get device CCB for the specified device. */ static int get_cgd(struct cam_device *device, struct ccb_getdev *cgd) @@ -3764,9 +3771,9 @@ scsiformat(struct cam_device *device, int argc, char **argv, fprintf(stdout, "\rFormatting: %ju.%02u %% " "(%d/%d) done", - (uintmax_t)(percentage / + (uintmax_t)(percentage / (0x10000 * 100)), - (unsigned)((percentage / + (unsigned)((percentage / 0x10000) % 100), val, 0x10000); fflush(stdout); @@ -3956,7 +3963,7 @@ scsireportluns(struct cam_device *device, int argc, char **argv, case RPL_LUNDATA_ATYP_PERIPH: if ((lundata->luns[i].lundata[j] & RPL_LUNDATA_PERIPH_BUS_MASK) != 0) - fprintf(stdout, "%d:", + fprintf(stdout, "%d:", lundata->luns[i].lundata[j] & RPL_LUNDATA_PERIPH_BUS_MASK); else if ((j == 0) @@ -3994,7 +4001,7 @@ scsireportluns(struct cam_device *device, int argc, char **argv, field_len_code = (lundata->luns[i].lundata[j] & RPL_LUNDATA_EXT_LEN_MASK) >> 4; field_len = field_len_code * 2; - + if ((eam_code == RPL_LUNDATA_EXT_EAM_WK) && (field_len_code == 0x00)) { fprintf(stdout, "%d", @@ -4352,7 +4359,7 @@ atapm(struct cam_device *device, int argc, char **argv, #endif /* MINIMALISTIC */ -void +void usage(int verbose) { fprintf(verbose ? stdout : stderr, @@ -4494,7 +4501,7 @@ usage(int verbose) #endif /* MINIMALISTIC */ } -int +int main(int argc, char **argv) { int c; @@ -4544,7 +4551,7 @@ main(int argc, char **argv) * this. getopt is kinda braindead, so you end up having to run * through the options twice, and give each invocation of getopt * the option string for the other invocation. - * + * * You would think that you could just have two groups of options. * The first group would get parsed by the first invocation of * getopt, and the second group would get parsed by the second @@ -4553,13 +4560,13 @@ main(int argc, char **argv) * to the argument _after_ the first argument in the second group. * So when the second invocation of getopt comes around, it doesn't * recognize the first argument it gets and then bails out. - * + * * A nice alternative would be to have a flag for getopt that says * "just keep parsing arguments even when you encounter an unknown * argument", but there isn't one. So there's no real clean way to * easily parse two sets of arguments without having one invocation * of getopt know about the other. - * + * * Without this hack, the first invocation of getopt would work as * long as the generic arguments are first, but the second invocation * (in the subfunction) would fail in one of two ways. In the case @@ -4573,14 +4580,14 @@ main(int argc, char **argv) * whether optind had been incremented one option too far. The * mechanics of that, however, are more daunting than just giving * both invocations all of the expect options for either invocation. - * + * * Needless to say, I wouldn't mind if someone invented a better * (non-GPL!) command line parsing interface than getopt. I * wouldn't mind if someone added more knobs to getopt to make it * work better. Who knows, I may talk myself into doing it someday, * if the standards weenies let me. As it is, it just leads to * hackery like this and causes people to avoid it in some cases. - * + * * KDM, September 8th, 1998 */ if (subopt != NULL) From 130a0815631130700a1a02ab7298254dc881e864 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Mon, 25 Oct 2010 07:39:05 +0000 Subject: [PATCH 52/65] add dates along with revision numbers in UPDATING entry for 20100915 --- UPDATING | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/UPDATING b/UPDATING index 6ec978011e57..aa8e5906b04d 100644 --- a/UPDATING +++ b/UPDATING @@ -35,10 +35,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW: 20100915: A workaround for a fixed ld bug has been removed in kernel code, so make sure that your system ld is built from sources after - revision 210245 (r211583 if building head kernel on stable/8, - r211584 for stable/7). A symptom of incorrect ld version is - different addresses for set_pcpu section and __start_set_pcpu - symbol in kernel and/or modules. + revision 210245 from 2010-07-19 (r211583 if building head kernel + on stable/8, r211584 for stable/7; both from 2010-08-21). + A symptom of incorrect ld version is different addresses for + set_pcpu section and __start_set_pcpu symbol in kernel and/or modules. 20100913: The $ipv6_prefer variable in rc.conf(5) has been split into From 5b9392e840d9dcfc112c8c2ea125efb9bfb01ef4 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 25 Oct 2010 07:41:21 +0000 Subject: [PATCH 53/65] Add missing mtx_destroy() on channel attach failure. --- sys/dev/ahci/ahci.c | 1 + sys/dev/siis/siis.c | 1 + 2 files changed, 2 insertions(+) diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index d6357b2edcb0..314bd91d1198 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -969,6 +969,7 @@ ahci_ch_attach(device_t dev) err0: bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_unlock(&ch->mtx); + mtx_destroy(&ch->mtx); return (error); } diff --git a/sys/dev/siis/siis.c b/sys/dev/siis/siis.c index 0aef7757114e..ba10c92c4696 100644 --- a/sys/dev/siis/siis.c +++ b/sys/dev/siis/siis.c @@ -527,6 +527,7 @@ siis_ch_attach(device_t dev) err0: bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_unlock(&ch->mtx); + mtx_destroy(&ch->mtx); return (error); } From 6ea7128dbd790d27a7154518249ed760054b2f44 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 25 Oct 2010 08:25:44 +0000 Subject: [PATCH 54/65] Make hw.snd.vpc_0db to be also a loader tunable. --- sys/dev/sound/pcm/channel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/sound/pcm/channel.c b/sys/dev/sound/pcm/channel.c index ec7cb9802ac8..2b3b3904c678 100644 --- a/sys/dev/sound/pcm/channel.c +++ b/sys/dev/sound/pcm/channel.c @@ -124,6 +124,7 @@ SYSCTL_INT(_hw_snd, OID_AUTO, vpc_autoreset, CTLFLAG_RW, &chn_vpc_autoreset, 0, "automatically reset channels volume to 0db"); static int chn_vol_0db_pcm = SND_VOL_0DB_PCM; +TUNABLE_INT("hw.snd.vpc_0db", &chn_vol_0db_pcm); static void chn_vpc_proc(int reset, int db) From a38de0134bcd916406d9be5c6c72ff568b9d6a6c Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Mon, 25 Oct 2010 08:30:19 +0000 Subject: [PATCH 55/65] Factor out DDB commands from r204145, r204279 into if_debug.c for further enhancements (1). Switch to a standard 2-clause BSD license for this (2). Unfortunately we have to un-static the ifindex_table for this but do not publicly export it. Suggested by: rwatson (1) a while back. Approved by: thompsa (2) for the change from r204279. MFC after: 6 days --- sys/conf/files | 1 + sys/net/if.c | 83 +----------------------------- sys/net/if_debug.c | 125 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 82 deletions(-) create mode 100644 sys/net/if_debug.c diff --git a/sys/conf/files b/sys/conf/files index 34477986ed2a..6620ffe760f5 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2384,6 +2384,7 @@ net/if_atmsubr.c optional atm net/if_bridge.c optional bridge inet | if_bridge inet net/if_clone.c standard net/if_dead.c standard +net/if_debug.c optional ddb net/if_disc.c optional disc net/if_edsc.c optional edsc net/if_ef.c optional ef diff --git a/sys/net/if.c b/sys/net/if.c index 3c8486aa6b65..17f01e94bf9b 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -33,7 +33,6 @@ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" -#include "opt_ddb.h" #include #include @@ -62,10 +61,6 @@ #include #include -#ifdef DDB -#include -#endif - #include #include #include @@ -191,7 +186,7 @@ VNET_DEFINE(struct ifgrouphead, ifg_head); static VNET_DEFINE(int, if_indexlim) = 8; /* Table of ifnet by index. */ -static VNET_DEFINE(struct ifindex_entry *, ifindex_table); +VNET_DEFINE(struct ifindex_entry *, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) @@ -3366,79 +3361,3 @@ if_deregister_com_alloc(u_char type) if_com_alloc[type] = NULL; if_com_free[type] = NULL; } - -#ifdef DDB -static void -if_show_ifnet(struct ifnet *ifp) -{ - - if (ifp == NULL) - return; - db_printf("%s:\n", ifp->if_xname); -#define IF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, ifp->e); - IF_DB_PRINTF("%s", if_dname); - IF_DB_PRINTF("%d", if_dunit); - IF_DB_PRINTF("%s", if_description); - IF_DB_PRINTF("%u", if_index); - IF_DB_PRINTF("%u", if_refcount); - IF_DB_PRINTF("%d", if_index_reserved); - IF_DB_PRINTF("%p", if_softc); - IF_DB_PRINTF("%p", if_l2com); - IF_DB_PRINTF("%p", if_vnet); - IF_DB_PRINTF("%p", if_home_vnet); - IF_DB_PRINTF("%p", if_addr); - IF_DB_PRINTF("%p", if_llsoftc); - IF_DB_PRINTF("%p", if_label); - IF_DB_PRINTF("%u", if_pcount); - IF_DB_PRINTF("0x%08x", if_flags); - IF_DB_PRINTF("0x%08x", if_drv_flags); - IF_DB_PRINTF("0x%08x", if_capabilities); - IF_DB_PRINTF("0x%08x", if_capenable); - IF_DB_PRINTF("%p", if_snd.ifq_head); - IF_DB_PRINTF("%p", if_snd.ifq_tail); - IF_DB_PRINTF("%d", if_snd.ifq_len); - IF_DB_PRINTF("%d", if_snd.ifq_maxlen); - IF_DB_PRINTF("%d", if_snd.ifq_drops); - IF_DB_PRINTF("%p", if_snd.ifq_drv_head); - IF_DB_PRINTF("%p", if_snd.ifq_drv_tail); - IF_DB_PRINTF("%d", if_snd.ifq_drv_len); - IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen); - IF_DB_PRINTF("%d", if_snd.altq_type); - IF_DB_PRINTF("%x", if_snd.altq_flags); -#undef IF_DB_PRINTF -} - -DB_SHOW_COMMAND(ifnet, db_show_ifnet) -{ - - if (!have_addr) { - db_printf("usage: show ifnet \n"); - return; - } - - if_show_ifnet((struct ifnet *)addr); -} - -DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) -{ - VNET_ITERATOR_DECL(vnet_iter); - struct ifnet *ifp; - u_short idx; - - VNET_FOREACH(vnet_iter) { - CURVNET_SET_QUIET(vnet_iter); -#ifdef VIMAGE - db_printf("vnet=%p\n", curvnet); -#endif - for (idx = 1; idx <= V_if_index; idx++) { - ifp = V_ifindex_table[idx].ife_ifnet; - if (ifp == NULL) - continue; - db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); - if (db_pager_quit) - break; - } - CURVNET_RESTORE(); - } -} -#endif diff --git a/sys/net/if_debug.c b/sys/net/if_debug.c new file mode 100644 index 000000000000..dcf504e5a0d8 --- /dev/null +++ b/sys/net/if_debug.c @@ -0,0 +1,125 @@ +/*- + * Copyright (c) 2010 Bjoern A. Zeeb + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include + +#ifdef DDB +#include +#endif + +#include +#include +#include +#include + +#ifdef DDB +struct ifindex_entry { + struct ifnet *ife_ifnet; +}; +VNET_DECLARE(struct ifindex_entry *, ifindex_table); +#define V_ifindex_table VNET(ifindex_table) + +static void +if_show_ifnet(struct ifnet *ifp) +{ + + if (ifp == NULL) + return; + db_printf("%s:\n", ifp->if_xname); +#define IF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, ifp->e); + IF_DB_PRINTF("%s", if_dname); + IF_DB_PRINTF("%d", if_dunit); + IF_DB_PRINTF("%s", if_description); + IF_DB_PRINTF("%u", if_index); + IF_DB_PRINTF("%u", if_refcount); + IF_DB_PRINTF("%d", if_index_reserved); + IF_DB_PRINTF("%p", if_softc); + IF_DB_PRINTF("%p", if_l2com); + IF_DB_PRINTF("%p", if_vnet); + IF_DB_PRINTF("%p", if_home_vnet); + IF_DB_PRINTF("%p", if_addr); + IF_DB_PRINTF("%p", if_llsoftc); + IF_DB_PRINTF("%p", if_label); + IF_DB_PRINTF("%u", if_pcount); + IF_DB_PRINTF("0x%08x", if_flags); + IF_DB_PRINTF("0x%08x", if_drv_flags); + IF_DB_PRINTF("0x%08x", if_capabilities); + IF_DB_PRINTF("0x%08x", if_capenable); + IF_DB_PRINTF("%p", if_snd.ifq_head); + IF_DB_PRINTF("%p", if_snd.ifq_tail); + IF_DB_PRINTF("%d", if_snd.ifq_len); + IF_DB_PRINTF("%d", if_snd.ifq_maxlen); + IF_DB_PRINTF("%d", if_snd.ifq_drops); + IF_DB_PRINTF("%p", if_snd.ifq_drv_head); + IF_DB_PRINTF("%p", if_snd.ifq_drv_tail); + IF_DB_PRINTF("%d", if_snd.ifq_drv_len); + IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen); + IF_DB_PRINTF("%d", if_snd.altq_type); + IF_DB_PRINTF("%x", if_snd.altq_flags); +#undef IF_DB_PRINTF +} + +DB_SHOW_COMMAND(ifnet, db_show_ifnet) +{ + + if (!have_addr) { + db_printf("usage: show ifnet \n"); + return; + } + + if_show_ifnet((struct ifnet *)addr); +} + +DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) +{ + VNET_ITERATOR_DECL(vnet_iter); + struct ifnet *ifp; + u_short idx; + + VNET_FOREACH(vnet_iter) { + CURVNET_SET_QUIET(vnet_iter); +#ifdef VIMAGE + db_printf("vnet=%p\n", curvnet); +#endif + for (idx = 1; idx <= V_if_index; idx++) { + ifp = V_ifindex_table[idx].ife_ifnet; + if (ifp == NULL) + continue; + db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); + if (db_pager_quit) + break; + } + CURVNET_RESTORE(); + } +} +#endif From 7f25f6c72dcca864beac24d8dd47154da19ef356 Mon Sep 17 00:00:00 2001 From: David Xu Date: Mon, 25 Oct 2010 09:16:04 +0000 Subject: [PATCH 56/65] Get cpuset in pthread_attr_get_np() and free it in pthread_attr_destroy(). MFC after: 7 days --- lib/libthr/thread/thr_attr.c | 39 ++++++++++++++++++++++++--------- lib/libthr/thread/thr_private.h | 2 ++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/lib/libthr/thread/thr_attr.c b/lib/libthr/thread/thr_attr.c index fedf42242698..abae3c15a273 100644 --- a/lib/libthr/thread/thr_attr.c +++ b/lib/libthr/thread/thr_attr.c @@ -104,6 +104,8 @@ #include "thr_private.h" +static size_t _get_kern_cpuset_size(void); + __weak_reference(_pthread_attr_destroy, pthread_attr_destroy); int @@ -116,6 +118,8 @@ _pthread_attr_destroy(pthread_attr_t *attr) /* Invalid argument: */ ret = EINVAL; else { + if ((*attr)->cpuset != NULL) + free((*attr)->cpuset); /* Free the memory allocated to the attribute object: */ free(*attr); @@ -132,28 +136,43 @@ _pthread_attr_destroy(pthread_attr_t *attr) __weak_reference(_pthread_attr_get_np, pthread_attr_get_np); int -_pthread_attr_get_np(pthread_t pthread, pthread_attr_t *dst) +_pthread_attr_get_np(pthread_t pthread, pthread_attr_t *dstattr) { struct pthread *curthread; - struct pthread_attr attr; + struct pthread_attr attr, *dst; int ret; + size_t cpusetsize; - if (pthread == NULL || dst == NULL || *dst == NULL) + if (pthread == NULL || dst_attr == NULL || (dst = *dstattr) == NULL) return (EINVAL); - + cpusetsize = _get_kern_cpuset_size(); + if (dst->cpusetsize < cpusetsize) { + char *newset = realloc(dst->cpuset, cpusetsize); + if (newset == NULL) + return (errno); + memset(newset + dst->cpusetsize, 0, cpusetsize - + dst->cpusetsize); + dst->cpuset = (cpuset_t *)newset; + dst->cpusetsize = cpusetsize; + } curthread = _get_curthread(); if ((ret = _thr_find_thread(curthread, pthread, /*include dead*/0)) != 0) return (ret); attr = pthread->attr; if (pthread->flags & THR_FLAGS_DETACHED) attr.flags |= PTHREAD_DETACHED; + ret = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, TID(pthread), + dst->cpusetsize, dst->cpuset); + if (ret == -1) + ret = errno; THR_THREAD_UNLOCK(curthread, pthread); - - memcpy(*dst, &attr, sizeof(struct pthread_attr)); - /* XXX */ - (*dst)->cpuset = NULL; - (*dst)->cpusetsize = 0; - return (0); + if (ret == 0) { + memcpy(&dst->pthread_attr_start_copy, + &attr.pthread_attr_start_copy, + offsetof(struct pthread_attr, pthread_attr_end_copy) - + offsetof(struct pthread_attr, pthread_attr_start_copy)); + } + return (ret); } __weak_reference(_pthread_attr_getdetachstate, pthread_attr_getdetachstate); diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h index aa9feefcf7d3..7180d123cae9 100644 --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -230,6 +230,7 @@ struct pthread_atfork { }; struct pthread_attr { +#define pthread_attr_start_copy sched_policy int sched_policy; int sched_inherit; int prio; @@ -239,6 +240,7 @@ struct pthread_attr { void *stackaddr_attr; size_t stacksize_attr; size_t guardsize_attr; +#define pthread_attr_end_copy cpuset cpuset_t *cpuset; size_t cpusetsize; }; From 65df4577974b2a2157f7b2731318413a8452d4ed Mon Sep 17 00:00:00 2001 From: David Xu Date: Mon, 25 Oct 2010 11:16:50 +0000 Subject: [PATCH 57/65] Fix typo. --- lib/libthr/thread/thr_attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libthr/thread/thr_attr.c b/lib/libthr/thread/thr_attr.c index abae3c15a273..7fe500346275 100644 --- a/lib/libthr/thread/thr_attr.c +++ b/lib/libthr/thread/thr_attr.c @@ -143,7 +143,7 @@ _pthread_attr_get_np(pthread_t pthread, pthread_attr_t *dstattr) int ret; size_t cpusetsize; - if (pthread == NULL || dst_attr == NULL || (dst = *dstattr) == NULL) + if (pthread == NULL || dstattr == NULL || (dst = *dstattr) == NULL) return (EINVAL); cpusetsize = _get_kern_cpuset_size(); if (dst->cpusetsize < cpusetsize) { From 42fe684c1a4bc10f6043eac48385d9e245b4c5b2 Mon Sep 17 00:00:00 2001 From: David Xu Date: Mon, 25 Oct 2010 13:13:16 +0000 Subject: [PATCH 58/65] Use function tdfind() to find a thread. --- sys/kern/kern_cpuset.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 3bdb45e3bf7c..23aa9df5e6e2 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -416,19 +416,10 @@ cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, td = curthread; break; } - sx_slock(&allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - FOREACH_THREAD_IN_PROC(p, td) - if (td->td_tid == id) - break; - if (td != NULL) - break; - PROC_UNLOCK(p); - } - sx_sunlock(&allproc_lock); + td = tdfind(id, -1); if (td == NULL) return (ESRCH); + p = td->td_proc; break; case CPU_WHICH_CPUSET: if (id == -1) { From 61eee6b8a7c62b4e2754de1d4c2a14e0ce2782fd Mon Sep 17 00:00:00 2001 From: Ivan Voras Date: Mon, 25 Oct 2010 14:05:25 +0000 Subject: [PATCH 59/65] Reduce the difference between hirunningspace and lorunningspace, it should help interactivity in edge cases. --- sys/kern/vfs_bio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 5e2cca2a43b6..ccde4b4ab4bc 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -623,16 +623,16 @@ bufinit(void) lobufspace = hibufspace - MAXBSIZE; /* - * Note: The 16 MB upper limit for hirunningspace was chosen + * Note: The 16 MiB upper limit for hirunningspace was chosen * arbitrarily and may need further tuning. It corresponds to * 128 outstanding write IO requests (if IO size is 128 KiB), * which fits with many RAID controllers' tagged queuing limits. - * The lower 1 MB limit is the historical upper limit for + * The lower 1 MiB limit is the historical upper limit for * hirunningspace. */ hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBSIZE), 16 * 1024 * 1024), 1024 * 1024); - lorunningspace = roundup(hirunningspace / 2, MAXBSIZE); + lorunningspace = roundup((hirunningspace * 2) / 3, MAXBSIZE); /* * Limit the amount of malloc memory since it is wired permanently into From c6390f7ac5ddf8ea72664adecf901280b98ced56 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Mon, 25 Oct 2010 15:28:03 +0000 Subject: [PATCH 60/65] Use intr_disable() and intr_restore() instead of frobbing the flags register directly to disable interrupts. Reviewed by: bde (earlier version) MFC after: 2 weeks --- sys/amd64/include/profile.h | 10 ++--- sys/i386/i386/elan-mmcr.c | 7 ++-- sys/i386/i386/identcpu.c | 7 ++-- sys/i386/i386/initcpu.c | 79 ++++++++++++++++--------------------- sys/i386/i386/longrun.c | 21 +++++----- sys/i386/include/profile.h | 10 ++--- 6 files changed, 59 insertions(+), 75 deletions(-) diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index 0083091daf2c..8fea3711fc54 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -77,17 +77,17 @@ #error this file needs to be ported to your compiler #endif /* !__GNUCLIKE_ASM */ #else /* !GUPROF */ -#define MCOUNT_DECL(s) u_long s; +#define MCOUNT_DECL(s) register_t s; #ifdef SMP extern int mcount_lock; -#define MCOUNT_ENTER(s) { s = read_rflags(); disable_intr(); \ +#define MCOUNT_ENTER(s) { s = intr_disable(); \ while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \ /* nothing */ ; } #define MCOUNT_EXIT(s) { atomic_store_rel_int(&mcount_lock, 0); \ - write_rflags(s); } + intr_restore(s); } #else -#define MCOUNT_ENTER(s) { s = read_rflags(); disable_intr(); } -#define MCOUNT_EXIT(s) (write_rflags(s)) +#define MCOUNT_ENTER(s) { s = intr_disable(); } +#define MCOUNT_EXIT(s) (intr_restore(s)) #endif #endif /* GUPROF */ diff --git a/sys/i386/i386/elan-mmcr.c b/sys/i386/i386/elan-mmcr.c index b184237b971f..6f6314621814 100644 --- a/sys/i386/i386/elan-mmcr.c +++ b/sys/i386/i386/elan-mmcr.c @@ -228,19 +228,18 @@ elan_poll_pps(struct timecounter *tc) static int state; int i; uint16_t u, x, y, z; - u_long eflags; + register_t saveintr; /* * Grab the HW state as quickly and compactly as we can. Disable * interrupts to avoid measuring our interrupt service time on * hw with quality clock sources. */ - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); x = *pps_ap[0]; /* state, must be first, see below */ y = *pps_ap[1]; /* timer2 */ z = *pps_ap[2]; /* timer1 */ - write_eflags(eflags); + intr_restore(saveintr); /* * Order is important here. We need to check the state of the GPIO diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index 010a29fa0367..afdedc209b7c 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -1037,12 +1037,11 @@ identblue(void) static void identifycyrix(void) { - u_int eflags; + register_t saveintr; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); @@ -1067,7 +1066,7 @@ identifycyrix(void) else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ - write_eflags(eflags); + intr_restore(saveintr); } /* Update TSC freq with the value indicated by the caller. */ diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c index 75ff6edcbd06..97197829d044 100644 --- a/sys/i386/i386/initcpu.c +++ b/sys/i386/i386/initcpu.c @@ -116,14 +116,13 @@ u_int cpu_mxcsr_mask; /* valid bits in mxcsr */ static void init_bluelightning(void) { - u_long eflags; + register_t saveintr; #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) need_post_dma_flush = 1; #endif - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); invd(); @@ -144,7 +143,7 @@ init_bluelightning(void) /* Enable caching in CR0. */ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ invd(); - write_eflags(eflags); + intr_restore(saveintr); } /* @@ -153,11 +152,10 @@ init_bluelightning(void) static void init_486dlc(void) { - u_long eflags; + register_t saveintr; u_char ccr0; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); invd(); ccr0 = read_cyrix_reg(CCR0); @@ -189,7 +187,7 @@ init_486dlc(void) load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ invd(); #endif /* !CYRIX_CACHE_WORKS */ - write_eflags(eflags); + intr_restore(saveintr); } @@ -199,11 +197,10 @@ init_486dlc(void) static void init_cy486dx(void) { - u_long eflags; + register_t saveintr; u_char ccr2; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); invd(); ccr2 = read_cyrix_reg(CCR2); @@ -220,7 +217,7 @@ init_cy486dx(void) #endif write_cyrix_reg(CCR2, ccr2); - write_eflags(eflags); + intr_restore(saveintr); } @@ -230,11 +227,10 @@ init_cy486dx(void) static void init_5x86(void) { - u_long eflags; + register_t saveintr; u_char ccr2, ccr3, ccr4, pcr0; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -320,29 +316,28 @@ init_5x86(void) /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - write_eflags(eflags); + intr_restore(saveintr); } #ifdef CPU_I486_ON_386 /* * There are i486 based upgrade products for i386 machines. - * In this case, BIOS doesn't enables CPU cache. + * In this case, BIOS doesn't enable CPU cache. */ static void init_i486_on_386(void) { - u_long eflags; + register_t saveintr; #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) need_post_dma_flush = 1; #endif - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ - write_eflags(eflags); + intr_restore(saveintr); } #endif @@ -354,11 +349,10 @@ init_i486_on_386(void) static void init_6x86(void) { - u_long eflags; + register_t saveintr; u_char ccr3, ccr4; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -422,7 +416,7 @@ init_6x86(void) /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - write_eflags(eflags); + intr_restore(saveintr); } #endif /* I486_CPU */ @@ -435,11 +429,10 @@ init_6x86(void) static void init_6x86MX(void) { - u_long eflags; + register_t saveintr; u_char ccr3, ccr4; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -489,7 +482,7 @@ init_6x86MX(void) /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - write_eflags(eflags); + intr_restore(saveintr); } static void @@ -513,11 +506,10 @@ static void init_mendocino(void) { #ifdef CPU_PPRO2CELERON - u_long eflags; + register_t saveintr; u_int64_t bbl_cr_ctl3; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -541,7 +533,7 @@ init_mendocino(void) } load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); - write_eflags(eflags); + intr_restore(saveintr); #endif /* CPU_PPRO2CELERON */ } @@ -842,10 +834,9 @@ enable_K6_wt_alloc(void) { quad_t size; u_int64_t whcr; - u_long eflags; + register_t saveintr; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); wbinvd(); #ifdef CPU_DISABLE_CACHE @@ -895,7 +886,7 @@ enable_K6_wt_alloc(void) #endif wrmsr(0x0c0000082, whcr); - write_eflags(eflags); + intr_restore(saveintr); } void @@ -903,10 +894,9 @@ enable_K6_2_wt_alloc(void) { quad_t size; u_int64_t whcr; - u_long eflags; + register_t saveintr; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); wbinvd(); #ifdef CPU_DISABLE_CACHE @@ -956,7 +946,7 @@ enable_K6_2_wt_alloc(void) #endif wrmsr(0x0c0000082, whcr); - write_eflags(eflags); + intr_restore(saveintr); } #endif /* I585_CPU && CPU_WT_ALLOC */ @@ -966,15 +956,14 @@ enable_K6_2_wt_alloc(void) DB_SHOW_COMMAND(cyrixreg, cyrixreg) { - u_long eflags; + register_t saveintr; u_int cr0; u_char ccr1, ccr2, ccr3; u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0; cr0 = rcr0(); if (cpu_vendor_id == CPU_VENDOR_CYRIX) { - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { @@ -992,7 +981,7 @@ DB_SHOW_COMMAND(cyrixreg, cyrixreg) pcr0 = read_cyrix_reg(PCR0); write_cyrix_reg(CCR3, ccr3); /* Restore CCR3. */ } - write_eflags(eflags); + intr_restore(saveintr); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) printf("CCR0=%x, ", (u_int)ccr0); diff --git a/sys/i386/i386/longrun.c b/sys/i386/i386/longrun.c index a75726e00c46..08fe3b1b27b1 100644 --- a/sys/i386/i386/longrun.c +++ b/sys/i386/i386/longrun.c @@ -84,12 +84,11 @@ static u_int32_t longrun_modes[LONGRUN_MODE_MAX][3] = { static u_int tmx86_get_longrun_mode(void) { - u_long eflags; + register_t saveintr; union msrinfo msrinfo; u_int low, high, flags, mode; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); low = LONGRUN_MODE_MASK(msrinfo.regs[0]); @@ -105,40 +104,38 @@ tmx86_get_longrun_mode(void) } mode = LONGRUN_MODE_UNKNOWN; out: - write_eflags(eflags); + intr_restore(saveintr); return (mode); } static u_int tmx86_get_longrun_status(u_int * frequency, u_int * voltage, u_int * percentage) { - u_long eflags; + register_t saveintr; u_int regs[4]; - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); do_cpuid(0x80860007, regs); *frequency = regs[0]; *voltage = regs[1]; *percentage = regs[2]; - write_eflags(eflags); + intr_restore(saveintr); return (1); } static u_int tmx86_set_longrun_mode(u_int mode) { - u_long eflags; + register_t saveintr; union msrinfo msrinfo; if (mode >= LONGRUN_MODE_UNKNOWN) { return (0); } - eflags = read_eflags(); - disable_intr(); + saveintr = intr_disable(); /* Write LongRun mode values to Model Specific Register. */ msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); @@ -153,7 +150,7 @@ tmx86_set_longrun_mode(u_int mode) msrinfo.regs[0] = (msrinfo.regs[0] & ~0x01) | longrun_modes[mode][2]; wrmsr(MSR_TMx86_LONGRUN_FLAGS, msrinfo.msr); - write_eflags(eflags); + intr_restore(saveintr); return (1); } diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h index fb3320b6465d..d4db0c9d829d 100644 --- a/sys/i386/include/profile.h +++ b/sys/i386/include/profile.h @@ -77,17 +77,17 @@ #error #endif /* !__GNUCLIKE_ASM */ #else /* !GUPROF */ -#define MCOUNT_DECL(s) u_long s; +#define MCOUNT_DECL(s) register_t s; #ifdef SMP extern int mcount_lock; -#define MCOUNT_ENTER(s) { s = read_eflags(); disable_intr(); \ +#define MCOUNT_ENTER(s) { s = intr_disable(); \ while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \ /* nothing */ ; } #define MCOUNT_EXIT(s) { atomic_store_rel_int(&mcount_lock, 0); \ - write_eflags(s); } + intr_restore(s); } #else -#define MCOUNT_ENTER(s) { s = read_eflags(); disable_intr(); } -#define MCOUNT_EXIT(s) (write_eflags(s)) +#define MCOUNT_ENTER(s) { s = intr_disable(); } +#define MCOUNT_EXIT(s) (intr_restore(s)) #endif #endif /* GUPROF */ From 0689bdcc194b269832922eee66a89ebbc118651a Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Mon, 25 Oct 2010 15:31:13 +0000 Subject: [PATCH 61/65] Use 'saveintr' instead of 'savecrit' or 'eflags' to hold the state returned by intr_disable(). Requested by: bde --- sys/amd64/amd64/fpu.c | 6 +++--- sys/i386/i386/initcpu.c | 6 +++--- sys/i386/i386/perfmon.c | 18 +++++++++--------- sys/i386/isa/npx.c | 6 +++--- sys/x86/x86/local_apic.c | 12 ++++++------ 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 5e05ecd3f710..d75df64943fb 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -113,14 +113,14 @@ static struct savefpu fpu_initialstate; void fpuinit(void) { - register_t savecrit; + register_t saveintr; u_int mxcsr; u_short control; /* * It is too early for critical_enter() to work on AP. */ - savecrit = intr_disable(); + saveintr = intr_disable(); stop_emulating(); fninit(); control = __INITIAL_FPUCW__; @@ -137,7 +137,7 @@ fpuinit(void) bzero(fpu_initialstate.sv_xmm, sizeof(fpu_initialstate.sv_xmm)); } start_emulating(); - intr_restore(savecrit); + intr_restore(saveintr); } /* diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c index 97197829d044..902076532a9e 100644 --- a/sys/i386/i386/initcpu.c +++ b/sys/i386/i386/initcpu.c @@ -787,14 +787,14 @@ void enable_K5_wt_alloc(void) { u_int64_t msr; - register_t savecrit; + register_t saveintr; /* * Write allocate is supported only on models 1, 2, and 3, with * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { - savecrit = intr_disable(); + saveintr = intr_disable(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); @@ -825,7 +825,7 @@ enable_K5_wt_alloc(void) msr=rdmsr(0x83); wrmsr(0x83, msr|0x10); /* enable write allocate */ - intr_restore(savecrit); + intr_restore(saveintr); } } diff --git a/sys/i386/i386/perfmon.c b/sys/i386/i386/perfmon.c index d085261547c2..c61e8fe04067 100644 --- a/sys/i386/i386/perfmon.c +++ b/sys/i386/i386/perfmon.c @@ -128,18 +128,18 @@ perfmon_avail(void) int perfmon_setup(int pmc, unsigned int control) { - register_t savecrit; + register_t saveintr; if (pmc < 0 || pmc >= NPMC) return EINVAL; perfmon_inuse |= (1 << pmc); control &= ~(PMCF_SYS_FLAGS << 16); - savecrit = intr_disable(); + saveintr = intr_disable(); ctl_shadow[pmc] = control; writectl(pmc); wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); - intr_restore(savecrit); + intr_restore(saveintr); return 0; } @@ -174,17 +174,17 @@ perfmon_fini(int pmc) int perfmon_start(int pmc) { - register_t savecrit; + register_t saveintr; if (pmc < 0 || pmc >= NPMC) return EINVAL; if (perfmon_inuse & (1 << pmc)) { - savecrit = intr_disable(); + saveintr = intr_disable(); ctl_shadow[pmc] |= (PMCF_EN << 16); wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); writectl(pmc); - intr_restore(savecrit); + intr_restore(saveintr); return 0; } return EBUSY; @@ -193,17 +193,17 @@ perfmon_start(int pmc) int perfmon_stop(int pmc) { - register_t savecrit; + register_t saveintr; if (pmc < 0 || pmc >= NPMC) return EINVAL; if (perfmon_inuse & (1 << pmc)) { - savecrit = intr_disable(); + saveintr = intr_disable(); pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; ctl_shadow[pmc] &= ~(PMCF_EN << 16); writectl(pmc); - intr_restore(savecrit); + intr_restore(saveintr); return 0; } return EBUSY; diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 72a6aa860be9..f8dc11c6cb9b 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -343,7 +343,7 @@ void npxinit(void) { static union savefpu dummy; - register_t savecrit; + register_t saveintr; u_short control; if (!hw_float) @@ -355,7 +355,7 @@ npxinit(void) * * It is too early for critical_enter() to work on AP. */ - savecrit = intr_disable(); + saveintr = intr_disable(); npxsave(&dummy); stop_emulating(); #ifdef CPU_ENABLE_SSE @@ -366,7 +366,7 @@ npxinit(void) control = __INITIAL_NPXCW__; fldcw(control); start_emulating(); - intr_restore(savecrit); + intr_restore(saveintr); } /* diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 6d7a53b5fdc4..6c72d2859cfe 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -341,12 +341,12 @@ lapic_setup(int boot) { struct lapic *la; u_int32_t maxlvt; - register_t eflags; + register_t saveintr; char buf[MAXCOMLEN + 1]; la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); - eflags = intr_disable(); + saveintr = intr_disable(); maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ @@ -393,7 +393,7 @@ lapic_setup(int boot) if (maxlvt >= LVT_CMCI) lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci); - intr_restore(eflags); + intr_restore(saveintr); } void @@ -1415,7 +1415,7 @@ lapic_ipi_wait(int delay) void lapic_ipi_raw(register_t icrlo, u_int dest) { - register_t value, eflags; + register_t value, saveintr; /* XXX: Need more sanity checking of icrlo? */ KASSERT(lapic != NULL, ("%s called too early", __func__)); @@ -1425,7 +1425,7 @@ lapic_ipi_raw(register_t icrlo, u_int dest) ("%s: reserved bits set in ICR LO register", __func__)); /* Set destination in ICR HI register if it is being used. */ - eflags = intr_disable(); + saveintr = intr_disable(); if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { value = lapic->icr_hi; value &= ~APIC_ID_MASK; @@ -1438,7 +1438,7 @@ lapic_ipi_raw(register_t icrlo, u_int dest) value &= APIC_ICRLO_RESV_MASK; value |= icrlo; lapic->icr_lo = value; - intr_restore(eflags); + intr_restore(saveintr); } #define BEFORE_SPIN 1000000 From 111044e6c2cc192cefc6264b9aad14368f4ac0b4 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Mon, 25 Oct 2010 15:41:12 +0000 Subject: [PATCH 62/65] Don't create spurious /dev entries. Submitted by: andreast --- sys/powerpc/mambo/mambo_console.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/powerpc/mambo/mambo_console.c b/sys/powerpc/mambo/mambo_console.c index 47e32ce3170e..a5ef2f9de35d 100644 --- a/sys/powerpc/mambo/mambo_console.c +++ b/sys/powerpc/mambo/mambo_console.c @@ -86,7 +86,6 @@ cn_drvinit(void *unused) tp = tty_alloc(&mambo_ttydevsw, NULL); tty_init_console(tp, 0); tty_makedev(tp, NULL, "%s", "mambocons"); - tty_makealias(tp, "mambocons"); polltime = 1; From 495ed64c167cc837bc1f1db870d43383456c3558 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Mon, 25 Oct 2010 15:51:43 +0000 Subject: [PATCH 63/65] The EHCI_CAPLENGTH and EHCI_HCIVERSION registers are actually sub-registers within the first 4 bytes of the EHCI memory space. For controllers that use big-endian MMIO, reading them with 1- and 2-byte reads would then return the wrong values. Instead, read the combined register with a 4-byte read and mask out the interesting quantities. --- sys/dev/pci/pci.c | 2 +- sys/dev/usb/controller/ehci.c | 4 ++-- sys/dev/usb/controller/ehcireg.h | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index ef80f815d94b..6069b6da7d82 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -2803,7 +2803,7 @@ ehci_early_takeover(device_t self) "SMM does not respond\n"); } /* Disable interrupts */ - offs = bus_read_1(res, EHCI_CAPLENGTH); + offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION)); bus_write_4(res, offs + EHCI_USBINTR, 0); } bus_release_resource(self, SYS_RES_MEMORY, rid, res); diff --git a/sys/dev/usb/controller/ehci.c b/sys/dev/usb/controller/ehci.c index 91119823c818..329c9d861efd 100644 --- a/sys/dev/usb/controller/ehci.c +++ b/sys/dev/usb/controller/ehci.c @@ -270,9 +270,9 @@ ehci_init(ehci_softc_t *sc) } #endif - sc->sc_offs = EREAD1(sc, EHCI_CAPLENGTH); + sc->sc_offs = EHCI_CAPLENGTH(EREAD4(sc, EHCI_CAPLEN_HCIVERSION)); - version = EREAD2(sc, EHCI_HCIVERSION); + version = EHCI_HCIVERSION(EREAD4(sc, EHCI_CAPLEN_HCIVERSION)); device_printf(sc->sc_bus.bdev, "EHCI version %x.%x\n", version >> 8, version & 0xff); diff --git a/sys/dev/usb/controller/ehcireg.h b/sys/dev/usb/controller/ehcireg.h index ab81f08cf302..1f5fc5c06cad 100644 --- a/sys/dev/usb/controller/ehcireg.h +++ b/sys/dev/usb/controller/ehcireg.h @@ -54,9 +54,13 @@ #define EHCI_LEGSUP_USBLEGCTLSTS 0x04 /* EHCI capability registers */ -#define EHCI_CAPLENGTH 0x00 /* RO Capability register length field */ -#define EHCI_RESERVED 0x01 /* Reserved register */ -#define EHCI_HCIVERSION 0x02 /* RO Interface version number */ +#define EHCI_CAPLEN_HCIVERSION 0x00 /* RO Capability register length + * (least-significant byte) and + * interface version number (two + * most significant) + */ +#define EHCI_CAPLENGTH(x) ((x) & 0xff) +#define EHCI_HCIVERSION(x) (((x) >> 16) & 0xffff) #define EHCI_HCSPARAMS 0x04 /* RO Structural parameters */ #define EHCI_HCS_DEBUGPORT(x) (((x) >> 20) & 0xf) #define EHCI_HCS_P_INDICATOR(x) ((x) & 0x10000) From 94294cada52e7aa758269946275bcf67b9c0d864 Mon Sep 17 00:00:00 2001 From: Thomas Quinot Date: Mon, 25 Oct 2010 16:11:37 +0000 Subject: [PATCH 64/65] Fix typo in comment. --- sys/netipsec/ipsec_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 0710d34c264b..9b0224745ce9 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -108,7 +108,7 @@ static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int); /* * ipsec_common_input gets called when an IPsec-protected packet - * is received by IPv4 or IPv6. It's job is to find the right SA + * is received by IPv4 or IPv6. Its job is to find the right SA * and call the appropriate transform. The transform callback * takes care of further processing (like ingress filtering). */ From e7926a370374868c62008bc8b7804cb03bb92e3f Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Mon, 25 Oct 2010 16:23:35 +0000 Subject: [PATCH 65/65] Reimplemented "gpart destroy -F". Now it does all work in kernel. This was needed for recover implementation. Implement the recover command for GPT. Now GPT will marked as corrupt when any of three types of corruption will be detected: 1. Damaged primary GPT header or table 2. Damaged secondary GPT header or table 3. Secondary header is not located in the last LBA Marked GPT becomes read-only. Any changes with corrupt table are prohibited. Only "destroy" and "recover" commands are allowed. Discussed with: geom@ (mostly silence) Tested by: Ilya A. Arhipov Approved by: mav (mentor) MFC after: 2 weeks --- sbin/geom/class/part/geom_part.c | 96 +++++--------------------------- sbin/geom/class/part/gpart.8 | 87 ++++++++++++++++++++++++++++- sys/geom/part/g_part.c | 83 +++++++++++++++++++++++++-- sys/geom/part/g_part.h | 3 + sys/geom/part/g_part_gpt.c | 93 +++++++++++++++++++++++++------ sys/geom/part/g_part_if.m | 11 ++++ 6 files changed, 267 insertions(+), 106 deletions(-) diff --git a/sbin/geom/class/part/geom_part.c b/sbin/geom/class/part/geom_part.c index 55db274052e5..15bfa0bef28b 100644 --- a/sbin/geom/class/part/geom_part.c +++ b/sbin/geom/class/part/geom_part.c @@ -67,7 +67,6 @@ static char ssize[32]; #define GPART_PARAM_BOOTCODE "bootcode" #define GPART_PARAM_INDEX "index" #define GPART_PARAM_PARTCODE "partcode" -#define GPART_PARAM_FORCE "force" static struct gclass *find_class(struct gmesh *, const char *); static struct ggeom * find_geom(struct gclass *, const char *); @@ -85,7 +84,6 @@ static void gpart_show_geom(struct ggeom *, const char *); static int gpart_show_hasopt(struct gctl_req *, const char *, const char *); static void gpart_write_partcode(struct ggeom *, int, void *, ssize_t); static void gpart_write_partcode_vtoc8(struct ggeom *, int, void *); -static void gpart_destroy(struct gctl_req *, unsigned int); static void gpart_print_error(const char *); struct g_command PUBSYM(class_commands)[] = { @@ -123,8 +121,8 @@ struct g_command PUBSYM(class_commands)[] = { G_OPT_SENTINEL }, "-i index [-f flags] geom" }, - { "destroy", 0, gpart_destroy, { - { 'F', GPART_PARAM_FORCE, NULL, G_TYPE_BOOL }, + { "destroy", 0, gpart_issue, { + { 'F', "force", NULL, G_TYPE_BOOL }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "[-F] [-f flags] geom" @@ -167,6 +165,11 @@ struct g_command PUBSYM(class_commands)[] = { G_OPT_SENTINEL }, "[-s size] -i index [-f flags] geom" }, + { "recover", 0, gpart_issue, { + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "[-f flags] geom" + }, G_CMD_SENTINEL }; @@ -539,13 +542,17 @@ gpart_show_geom(struct ggeom *gp, const char *element) s = find_geomcfg(gp, "last"); last = (off_t)strtoimax(s, NULL, 0); wblocks = strlen(s); + s = find_geomcfg(gp, "state"); + if (s != NULL && *s != 'C') + s = NULL; wname = strlen(gp->lg_name); pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; secsz = pp->lg_sectorsize; - printf("=>%*jd %*jd %*s %s (%s)\n", + printf("=>%*jd %*jd %*s %s (%s)%s\n", wblocks, (intmax_t)first, wblocks, (intmax_t)(last - first + 1), wname, gp->lg_name, - scheme, fmtsize(pp->lg_mediasize)); + scheme, fmtsize(pp->lg_mediasize), + s ? " [CORRUPT]": ""); while ((pp = find_provider(gp, first)) != NULL) { s = find_provcfg(pp, "start"); @@ -857,83 +864,6 @@ gpart_bootcode(struct gctl_req *req, unsigned int fl) geom_deletetree(&mesh); } -static void -gpart_destroy(struct gctl_req *req, unsigned int fl) -{ - struct gmesh mesh; - struct gclass *classp; - struct gctl_req *req2; - struct ggeom *gp; - struct gprovider *pp; - const char *s; - int error, val; - intmax_t idx; - - if (gctl_has_param(req, GPART_PARAM_FORCE)) { - val = gctl_get_int(req, GPART_PARAM_FORCE); - error = gctl_delete_param(req, GPART_PARAM_FORCE); - if (error) - errc(EXIT_FAILURE, error, "internal error"); - if (val == 0) - goto done; - s = gctl_get_ascii(req, "class"); - if (s == NULL) - abort(); - error = geom_gettree(&mesh); - if (error != 0) - errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); - classp = find_class(&mesh, s); - if (classp == NULL) { - geom_deletetree(&mesh); - errx(EXIT_FAILURE, "Class %s not found.", s); - } - s = gctl_get_ascii(req, "arg0"); - if (s == NULL) - abort(); - gp = find_geom(classp, s); - if (gp == NULL) - errx(EXIT_FAILURE, "No such geom: %s.", s); - val = 0; - LIST_FOREACH(pp, &gp->lg_provider, lg_provider){ - s = find_provcfg(pp, "index"); - if (s == NULL) - errx(EXIT_FAILURE, "Index not found for %s.", - pp->lg_name); - idx = strtoimax(s, NULL, 0); - req2 = gctl_get_handle(); - gctl_ro_param(req2, "class", -1, classp->lg_name); - gctl_ro_param(req2, "arg0", -1, gp->lg_name); - gctl_ro_param(req2, "verb", -1, "delete"); - gctl_ro_param(req2, GPART_PARAM_INDEX, - sizeof(intmax_t), &idx); - gctl_ro_param(req2, "flags", -1, "X"); - s = gctl_issue(req2); - if (s != NULL && s[0] != '\0') { - gpart_print_error(s); - gctl_free(req2); - if (val) { /* try to undo changes */ - req2 = gctl_get_handle(); - gctl_ro_param(req2, "verb", -1, - "undo"); - gctl_ro_param(req2, "class", -1, - classp->lg_name); - gctl_ro_param(req2, "arg0", -1, - gp->lg_name); - gctl_issue(req2); - gctl_free(req2); - } - geom_deletetree(&mesh); - exit(EXIT_FAILURE); - } - gctl_free(req2); - val = 1; - } - geom_deletetree(&mesh); - } -done: - gpart_issue(req, fl); -} - static void gpart_print_error(const char *errstr) { diff --git a/sbin/geom/class/part/gpart.8 b/sbin/geom/class/part/gpart.8 index cb9b5c9ed499..f01e56a4a7ec 100644 --- a/sbin/geom/class/part/gpart.8 +++ b/sbin/geom/class/part/gpart.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 28, 2010 +.Dd October 25, 2010 .Dt GPART 8 .Os .Sh NAME @@ -129,6 +129,11 @@ utility: .Op Fl t Ar type .Op Fl f Ar flags .Ar geom +.\" ==== RECOVER ==== +.Nm +.Cm recover +.Op Fl f Ar flags +.Ar geom .\" ==== RESIZE ==== .Nm .Cm resize @@ -353,6 +358,23 @@ See the section entitled below for a discussion about its use. .El +.\" ==== RECOVER ==== +.It Cm recover +Recover corrupt partition's scheme metadata on the geom +.Ar geom . +See the section entitled +.Sx "RECOVERING" +below for the additional information. +.Pp +Additional options include: +.Bl -tag -width 10n +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.El .\" ==== RESIZE ==== .It Cm resize Resize a partition from geom @@ -629,6 +651,68 @@ the action or reverted with the .Cm undo action. +.Sh RECOVERING +The GEOM class PART supports recovering of partition tables only for GPT. +The GUID partition table has a primary and secondary (backup) copy of +metadata for redundance. They are stored in the begining and in the end +of device respectively. Therefore it is acceptable to have some corruptions +in the metadata that are not fatal to work with GPT. When kernel detects +corrupt metadata it marks this table as corrupt and reports about corruption. +Any changes in corrupt table are prohibited except +.Cm destroy +and +.Cm recover . +.Pp +In case when only first sector is corrupt kernel can not detect GPT even +if partition table is not corrupt. You can write protective MBR with +.Xr dd 1 +command to restore ability of GPT detection. The copy of protective MBR is +usually located in the +.Pa /boot/pmbr +file. +.Pp +In case when some of metadata is corrupt you will get to know about this +from kernel's messages like these: +.Bd -literal -offset indent +GEOM: provider: the primary GPT table is corrupt or invalid. +GEOM: provider: using the secondary instead -- recovery strongly advised. +.Ed +.Pp +or +.Bd -literal -offset indent +GEOM: provider: the secondary GPT table is corrupt or invalid. +GEOM: provider: using the primary only -- recovery suggested. +.Ed +.Pp +Also +.Cm gpart +commands like +.Cm show , +.Cm status +and +.Cm list +will report about corrupt table. +.Pp +In case when the size of device has changed (e.g. volume expansion) the +secondary GPT header will become located not in the last sector. This is +not a metadata corruption, but it is dangerous because any corruption of +the primary GPT will lead to lost of partition table. Kernel reports about +this problem with message: +.Bd -literal -offset indent +GEOM: provider: the secondary GPT header is not in the last LBA. +.Ed +.Pp +A corrupt table can be recovered with +.Cm gpart recover +command. This command does reconstruction of corrupt metadata using +known valid metadata. Also it can relocate secondary GPT to the end of +device. +.Pp +.Pa NOTE : +The GEOM class PART can detect the same partition table on different GEOM +providers and some of them will marked as corrupt. Be careful when choising +a provider for recovering. If you did incorrect choise you can destroy +metadata of another GEOM class, e.g. GEOM MIRROR or GEOM LABEL. .Sh EXIT STATUS Exit status is 0 on success, and 1 if the command fails. .Sh EXAMPLES @@ -687,6 +771,7 @@ After having created all required partitions, embed bootstrap code into them. /sbin/gpart bootcode -p /boot/boot1 da0 .Ed .Sh SEE ALSO +.Xr dd 1 , .Xr geom 4 , .Xr geom 8 .Sh HISTORY diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index d88213b1d65a..155daae02982 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -924,7 +924,7 @@ g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; - struct g_part_entry *entry; + struct g_part_entry *entry, *tmp; struct g_part_table *null, *table; struct sbuf *sb; int error; @@ -934,13 +934,34 @@ g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) g_topology_assert(); table = gp->softc; + /* Check for busy providers. */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; + if (gpp->gpp_force) { + pp = entry->gpe_pp; + if (pp == NULL) + continue; + if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) + continue; + } gctl_error(req, "%d", EBUSY); return (EBUSY); } + if (gpp->gpp_force) { + /* Destroy all providers. */ + LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { + pp = entry->gpe_pp; + if (pp != NULL) { + pp->private = NULL; + g_wither_provider(pp, ENXIO); + } + LIST_REMOVE(entry, gpe_entry); + g_free(entry); + } + } + error = G_PART_DESTROY(table, gpp); if (error) { gctl_error(req, "%d", error); @@ -1037,8 +1058,39 @@ g_part_ctl_move(struct gctl_req *req, struct g_part_parms *gpp) static int g_part_ctl_recover(struct gctl_req *req, struct g_part_parms *gpp) { - gctl_error(req, "%d verb 'recover'", ENOSYS); - return (ENOSYS); + struct g_part_table *table; + struct g_geom *gp; + struct sbuf *sb; + int error, recovered; + + gp = gpp->gpp_geom; + G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); + g_topology_assert(); + table = gp->softc; + error = recovered = 0; + + if (table->gpt_corrupt) { + error = G_PART_RECOVER(table); + if (error) { + gctl_error(req, "%d recovering '%s' failed", + error, gp->name); + return (error); + } + recovered = 1; + } + /* Provide feedback if so requested. */ + if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { + sb = sbuf_new_auto(); + if (recovered) + sbuf_printf(sb, "%s recovered\n", gp->name); + else + sbuf_printf(sb, "%s recovering is not needed\n", + gp->name); + sbuf_finish(sb); + gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); + } + return (0); } static int @@ -1341,6 +1393,7 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) } else if (!strcmp(verb, "destroy")) { ctlreq = G_PART_CTL_DESTROY; mparms |= G_PART_PARM_GEOM; + oparms |= G_PART_PARM_FORCE; } break; case 'm': @@ -1415,6 +1468,8 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) case 'f': if (!strcmp(ap->name, "flags")) parm = G_PART_PARM_FLAGS; + else if (!strcmp(ap->name, "force")) + parm = G_PART_PARM_FORCE; break; case 'i': if (!strcmp(ap->name, "index")) @@ -1453,7 +1508,8 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) } switch (parm) { case G_PART_PARM_ATTRIB: - error = g_part_parm_str(req, ap->name, &gpp.gpp_attrib); + error = g_part_parm_str(req, ap->name, + &gpp.gpp_attrib); break; case G_PART_PARM_BOOTCODE: error = g_part_parm_bootcode(req, ap->name, @@ -1466,11 +1522,16 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) case G_PART_PARM_FLAGS: error = g_part_parm_str(req, ap->name, &gpp.gpp_flags); break; + case G_PART_PARM_FORCE: + error = g_part_parm_uint32(req, ap->name, + &gpp.gpp_force); + break; case G_PART_PARM_GEOM: error = g_part_parm_geom(req, ap->name, &gpp.gpp_geom); break; case G_PART_PARM_INDEX: - error = g_part_parm_intmax(req, ap->name, &gpp.gpp_index); + error = g_part_parm_intmax(req, ap->name, + &gpp.gpp_index); break; case G_PART_PARM_LABEL: error = g_part_parm_str(req, ap->name, &gpp.gpp_label); @@ -1490,7 +1551,8 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) error = g_part_parm_quad(req, ap->name, &gpp.gpp_size); break; case G_PART_PARM_START: - error = g_part_parm_quad(req, ap->name, &gpp.gpp_start); + error = g_part_parm_quad(req, ap->name, + &gpp.gpp_start); break; case G_PART_PARM_TYPE: error = g_part_parm_str(req, ap->name, &gpp.gpp_type); @@ -1524,6 +1586,13 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) table = NULL; if (modifies && (gpp.gpp_parms & G_PART_PARM_GEOM)) { table = gpp.gpp_geom->softc; + if (table != NULL && table->gpt_corrupt && + ctlreq != G_PART_CTL_DESTROY && + ctlreq != G_PART_CTL_RECOVER) { + gctl_error(req, "%d table '%s' is corrupt", + EPERM, gpp.gpp_geom->name); + return; + } if (table != NULL && !table->gpt_opened) { error = g_access(LIST_FIRST(&gpp.gpp_geom->consumer), 1, 1, 1); @@ -1789,6 +1858,8 @@ g_part_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, table->gpt_sectors); sbuf_printf(sb, "%s%u\n", indent, table->gpt_heads); + sbuf_printf(sb, "%s%s\n", indent, + table->gpt_corrupt ? "CORRUPT": "OK"); G_PART_DUMPCONF(table, NULL, sb, indent); } } diff --git a/sys/geom/part/g_part.h b/sys/geom/part/g_part.h index 63cb06925568..e6722c378063 100644 --- a/sys/geom/part/g_part.h +++ b/sys/geom/part/g_part.h @@ -132,6 +132,7 @@ struct g_part_table { int gpt_modified:1; /* Table changes have been made. */ int gpt_opened:1; /* Permissions obtained. */ int gpt_fixgeom:1; /* Geometry is fixed. */ + int gpt_corrupt:1; /* Table is corrupt. */ }; struct g_part_entry *g_part_new_entry(struct g_part_table *, int, quad_t, @@ -169,6 +170,7 @@ enum g_part_ctl { #define G_PART_PARM_VERSION 0x0800 #define G_PART_PARM_BOOTCODE 0x1000 #define G_PART_PARM_ATTRIB 0x2000 +#define G_PART_PARM_FORCE 0x4000 struct g_part_parms { unsigned int gpp_parms; @@ -186,6 +188,7 @@ struct g_part_parms { const void *gpp_codeptr; unsigned int gpp_codesize; const char *gpp_attrib; + unsigned int gpp_force; }; void g_part_geometry_heads(off_t, u_int, off_t *, u_int *); diff --git a/sys/geom/part/g_part_gpt.c b/sys/geom/part/g_part_gpt.c index ee319cf1f496..3aa858d25ff6 100644 --- a/sys/geom/part/g_part_gpt.c +++ b/sys/geom/part/g_part_gpt.c @@ -94,7 +94,7 @@ static int g_part_gpt_destroy(struct g_part_table *, struct g_part_parms *); static void g_part_gpt_dumpconf(struct g_part_table *, struct g_part_entry *, struct sbuf *, const char *); static int g_part_gpt_dumpto(struct g_part_table *, struct g_part_entry *); -static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *, +static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static const char *g_part_gpt_name(struct g_part_table *, struct g_part_entry *, char *, size_t); @@ -107,6 +107,7 @@ static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *, static int g_part_gpt_write(struct g_part_table *, struct g_consumer *); static int g_part_gpt_resize(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); +static int g_part_gpt_recover(struct g_part_table *); static kobj_method_t g_part_gpt_methods[] = { KOBJMETHOD(g_part_add, g_part_gpt_add), @@ -120,6 +121,7 @@ static kobj_method_t g_part_gpt_methods[] = { KOBJMETHOD(g_part_name, g_part_gpt_name), KOBJMETHOD(g_part_probe, g_part_gpt_probe), KOBJMETHOD(g_part_read, g_part_gpt_read), + KOBJMETHOD(g_part_recover, g_part_gpt_recover), KOBJMETHOD(g_part_setunset, g_part_gpt_setunset), KOBJMETHOD(g_part_type, g_part_gpt_type), KOBJMETHOD(g_part_write, g_part_gpt_write), @@ -170,7 +172,7 @@ static struct uuid gpt_uuid_unused = GPT_ENT_TYPE_UNUSED; static struct g_part_uuid_alias { struct uuid *uuid; - int alias; + int alias; } gpt_uuid_alias_match[] = { { &gpt_uuid_apple_boot, G_PART_ALIAS_APPLE_BOOT }, { &gpt_uuid_apple_hfs, G_PART_ALIAS_APPLE_HFS }, @@ -217,8 +219,16 @@ gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp, pp = cp->provider; last = (pp->mediasize / pp->sectorsize) - 1; - table->lba[elt] = (elt == GPT_ELT_PRIHDR) ? 1 : last; table->state[elt] = GPT_STATE_MISSING; + /* + * If the primary header is valid look for secondary + * header in AlternateLBA, otherwise in the last medium's LBA. + */ + if (elt == GPT_ELT_SECHDR) { + if (table->state[GPT_ELT_PRIHDR] != GPT_STATE_OK) + table->lba[elt] = last; + } else + table->lba[elt] = 1; buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) @@ -244,12 +254,15 @@ gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp, table->state[elt] = GPT_STATE_INVALID; hdr->hdr_revision = le32toh(buf->hdr_revision); - if (hdr->hdr_revision < 0x00010000) + if (hdr->hdr_revision < GPT_HDR_REVISION) goto fail; hdr->hdr_lba_self = le64toh(buf->hdr_lba_self); if (hdr->hdr_lba_self != table->lba[elt]) goto fail; hdr->hdr_lba_alt = le64toh(buf->hdr_lba_alt); + if (hdr->hdr_lba_alt == hdr->hdr_lba_self || + hdr->hdr_lba_alt > last) + goto fail; /* Check the managed area. */ hdr->hdr_lba_start = le64toh(buf->hdr_lba_start); @@ -283,6 +296,10 @@ gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp, le_uuid_dec(&buf->hdr_uuid, &hdr->hdr_uuid); hdr->hdr_crc_table = le32toh(buf->hdr_crc_table); + /* save LBA for secondary header */ + if (elt == GPT_ELT_PRIHDR) + table->lba[GPT_ELT_SECHDR] = hdr->hdr_lba_alt; + g_free(buf); return (hdr); @@ -490,18 +507,21 @@ static int g_part_gpt_destroy(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_gpt_table *table; + struct g_provider *pp; table = (struct g_part_gpt_table *)basetable; - if (table->hdr != NULL) - g_free(table->hdr); + pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; + g_free(table->hdr); table->hdr = NULL; /* - * Wipe the first 2 sectors as well as the last to clear the - * partitioning. + * Wipe the first 2 sectors to clear the partitioning. Wipe the last + * sector only if it has valid secondary header. */ basetable->gpt_smhead |= 3; - basetable->gpt_smtail |= 1; + if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK && + table->lba[GPT_ELT_SECHDR] == pp->mediasize / pp->sectorsize - 1) + basetable->gpt_smtail |= 1; return (0); } @@ -665,10 +685,12 @@ g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) struct g_part_gpt_table *table; struct g_part_gpt_entry *entry; u_char *buf; + uint64_t last; int error, index; table = (struct g_part_gpt_table *)basetable; pp = cp->provider; + last = (pp->mediasize / pp->sectorsize) - 1; /* Read the PMBR */ buf = g_read_data(cp, 0, pp->sectorsize, &error); @@ -732,6 +754,7 @@ g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) printf("GEOM: %s: using the secondary instead -- recovery " "strongly advised.\n", pp->name); table->hdr = sechdr; + basetable->gpt_corrupt = 1; if (prihdr != NULL) g_free(prihdr); tbl = sectbl; @@ -743,6 +766,11 @@ g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) "or invalid.\n", pp->name); printf("GEOM: %s: using the primary only -- recovery " "suggested.\n", pp->name); + basetable->gpt_corrupt = 1; + } else if (table->lba[GPT_ELT_SECHDR] != last) { + printf( "GEOM: %s: the secondary GPT header is not in " + "the last LBA.\n", pp->name); + basetable->gpt_corrupt = 1; } table->hdr = prihdr; if (sechdr != NULL) @@ -759,8 +787,9 @@ g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) for (index = basetable->gpt_entries - 1; index >= 0; index--) { if (EQUUID(&tbl[index].ent_type, &gpt_uuid_unused)) continue; - entry = (struct g_part_gpt_entry *)g_part_new_entry(basetable, - index+1, tbl[index].ent_lba_start, tbl[index].ent_lba_end); + entry = (struct g_part_gpt_entry *)g_part_new_entry( + basetable, index + 1, tbl[index].ent_lba_start, + tbl[index].ent_lba_end); entry->ent = tbl[index]; } @@ -768,6 +797,38 @@ g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) return (0); } +static int +g_part_gpt_recover(struct g_part_table *basetable) +{ + struct g_part_gpt_table *table; + struct g_provider *pp; + uint64_t last; + size_t tblsz; + + table = (struct g_part_gpt_table *)basetable; + pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; + last = pp->mediasize / pp->sectorsize - 1; + tblsz = (table->hdr->hdr_entries * table->hdr->hdr_entsz + + pp->sectorsize - 1) / pp->sectorsize; + + table->lba[GPT_ELT_PRIHDR] = 1; + table->lba[GPT_ELT_PRITBL] = 2; + table->lba[GPT_ELT_SECHDR] = last; + table->lba[GPT_ELT_SECTBL] = last - tblsz; + table->state[GPT_ELT_PRIHDR] = GPT_STATE_OK; + table->state[GPT_ELT_PRITBL] = GPT_STATE_OK; + table->state[GPT_ELT_SECHDR] = GPT_STATE_OK; + table->state[GPT_ELT_SECTBL] = GPT_STATE_OK; + table->hdr->hdr_lba_start = 2 + tblsz; + table->hdr->hdr_lba_end = last - tblsz - 1; + + basetable->gpt_first = table->hdr->hdr_lba_start; + basetable->gpt_last = table->hdr->hdr_lba_end; + basetable->gpt_corrupt = 0; + + return (0); +} + static int g_part_gpt_setunset(struct g_part_table *table, struct g_part_entry *baseentry, const char *attrib, unsigned int set) @@ -867,13 +928,13 @@ g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp) struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; - size_t tlbsz; + size_t tblsz; uint32_t crc; int error, index; pp = cp->provider; table = (struct g_part_gpt_table *)basetable; - tlbsz = (table->hdr->hdr_entries * table->hdr->hdr_entsz + + tblsz = (table->hdr->hdr_entries * table->hdr->hdr_entsz + pp->sectorsize - 1) / pp->sectorsize; /* Write the PMBR */ @@ -885,7 +946,7 @@ g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp) return (error); /* Allocate space for the header and entries. */ - buf = g_malloc((tlbsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO); + buf = g_malloc((tblsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO); memcpy(buf, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig)); le32enc(buf + 8, table->hdr->hdr_revision); @@ -924,7 +985,7 @@ g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp) le32enc(buf + 16, crc); error = g_write_data(cp, table->lba[GPT_ELT_PRITBL] * pp->sectorsize, - buf + pp->sectorsize, tlbsz * pp->sectorsize); + buf + pp->sectorsize, tblsz * pp->sectorsize); if (error) goto out; error = g_write_data(cp, table->lba[GPT_ELT_PRIHDR] * pp->sectorsize, @@ -941,7 +1002,7 @@ g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp) le32enc(buf + 16, crc); error = g_write_data(cp, table->lba[GPT_ELT_SECTBL] * pp->sectorsize, - buf + pp->sectorsize, tlbsz * pp->sectorsize); + buf + pp->sectorsize, tblsz * pp->sectorsize); if (error) goto out; error = g_write_data(cp, table->lba[GPT_ELT_SECHDR] * pp->sectorsize, diff --git a/sys/geom/part/g_part_if.m b/sys/geom/part/g_part_if.m index 04440fef1221..4152e87fb02a 100644 --- a/sys/geom/part/g_part_if.m +++ b/sys/geom/part/g_part_if.m @@ -65,6 +65,12 @@ CODE { { return (ENOSYS); } + + static int + default_recover(struct g_part_table *t __unused) + { + return (ENOSYS); + } }; # add() - scheme specific processing for the add verb. @@ -163,6 +169,11 @@ METHOD int read { struct g_consumer *cp; }; +# recover() - scheme specific processing for the recover verb. +METHOD int recover { + struct g_part_table *table; +} DEFAULT default_recover; + # setunset() - set or unset partition entry attributes. METHOD int setunset { struct g_part_table *table;