From 90bcc81bc35e89049a94a12c259b0c53fcf7a299 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 14 Jul 2022 15:38:14 -0400 Subject: [PATCH] Delay GEOM disk_create() until CAM periph probe completes. Before this patch CAM periph drivers called both disk_alloc() and disk_create() same time on periph creation. But then prevented disks from opening until the periph probe completion with cam_periph_hold(). As result, especially if disk misbehaves during the probe, GEOM event thread, triggered to taste the disk, got blocked on open attempt, potentially for a long time, unable to process other events. This patch moves disk_create() call from periph creation to the end of the probe. To allow disk_create() calls from non-sleepable CAM contexts some of its duties requiring memory allocations are moved either back to disk_alloc() or forward to g_disk_create(), so now disk_alloc() and disk_add_alias() are the only disk methods that require sleeping. If disk fails during the probe disk_create() may just be skipped, going directly to disk_destroy(). Other method calls during that time are just ignored. Since GEOM may now see the disks after CAM bus scan is already completed, introduce per-periph boot hold functions. Enclosure driver already had such mechanism, so just generalize it. Reviewed by: imp MFC after: 1 month Sponsored by: iXsystems, Inc. Differential Revision: https://reviews.freebsd.org/D35784 --- sys/cam/ata/ata_da.c | 36 +++++++------- sys/cam/cam_periph.c | 14 ++++++ sys/cam/cam_periph.h | 3 ++ sys/cam/nvme/nvme_da.c | 24 ++++----- sys/cam/scsi/scsi_cd.c | 34 ++++++------- sys/cam/scsi/scsi_da.c | 51 +++++++------------ sys/cam/scsi/scsi_enc.c | 9 ++-- sys/cam/scsi/scsi_enc_internal.h | 2 - sys/geom/geom_disk.c | 85 +++++++++++++++++++------------- sys/geom/geom_disk.h | 4 +- 10 files changed, 134 insertions(+), 128 deletions(-) diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index ee6ea0b18902..c56e6f83aae9 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -1845,9 +1845,11 @@ adaregister(struct cam_periph *periph, void *arg) TASK_INIT(&softc->sysctl_task, 0, adasysctlinit, periph); /* - * Register this media as a disk + * Take a reference on the periph while adastart is called to finish + * the probe. The reference will be dropped in adaprobedone at the + * end of probe. */ - (void)cam_periph_hold(periph, PRIBIO); + (void)cam_periph_acquire(periph); cam_periph_unlock(periph); snprintf(announce_buf, ADA_ANNOUNCETMP_SZ, "kern.cam.ada.%d.quirks", periph->unit_number); @@ -1905,19 +1907,6 @@ adaregister(struct cam_periph *periph, void *arg) softc->disk->d_name = "ada"; softc->disk->d_drv1 = periph; softc->disk->d_unit = periph->unit_number; - - /* - * Acquire a reference to the periph before we register with GEOM. - * We'll release this reference once GEOM calls us back (via - * adadiskgonecb()) telling us that our provider has been freed. - */ - if (cam_periph_acquire(periph) != 0) { - xpt_print(periph->path, "%s: lost periph during " - "registration!\n", __func__); - cam_periph_lock(periph); - return (CAM_REQ_CMP_ERR); - } - disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); dp = &softc->params; @@ -1960,6 +1949,9 @@ adaregister(struct cam_periph *periph, void *arg) SBT_1S / ADA_ORDEREDTAG_INTERVAL * ada_default_timeout, 0, adasendorderedtag, softc, C_PREL(1)); + /* Released after probe when disk_create() call pass it to GEOM. */ + cam_periph_hold_boot(periph); + if (ADA_RA >= 0 && softc->flags & ADA_FLAG_CAN_RAHEAD) { softc->state = ADA_STATE_RAHEAD; } else if (ADA_WC >= 0 && softc->flags & ADA_FLAG_CAN_WCACHE) { @@ -1977,7 +1969,6 @@ adaregister(struct cam_periph *periph, void *arg) } xpt_schedule(periph, CAM_PRIORITY_DEV); - return(CAM_REQ_CMP); } @@ -2705,10 +2696,17 @@ adaprobedone(struct cam_periph *periph, union ccb *ccb) adaschedule(periph); if ((softc->flags & ADA_FLAG_ANNOUNCED) == 0) { softc->flags |= ADA_FLAG_ANNOUNCED; - cam_periph_unhold(periph); - } else { - cam_periph_release_locked(periph); + + /* + * We'll release this reference once GEOM calls us back via + * adadiskgonecb(), telling us that our provider has been freed. + */ + if (cam_periph_acquire(periph) == 0) + disk_create(softc->disk, DISK_VERSION); + + cam_periph_release_boot(periph); } + cam_periph_release_locked(periph); } static void diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index 8c7c6b53b473..1d49bf277ba1 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -530,6 +530,20 @@ cam_periph_unhold(struct cam_periph *periph) cam_periph_release_locked(periph); } +void +cam_periph_hold_boot(struct cam_periph *periph) +{ + + root_mount_hold_token(periph->periph_name, &periph->periph_rootmount); +} + +void +cam_periph_release_boot(struct cam_periph *periph) +{ + + root_mount_rel(&periph->periph_rootmount); +} + /* * Look for the next unit number that is not currently in use for this * peripheral type starting at "newunit". Also exclude unit numbers that diff --git a/sys/cam/cam_periph.h b/sys/cam/cam_periph.h index 9c323394797c..5500720a95b9 100644 --- a/sys/cam/cam_periph.h +++ b/sys/cam/cam_periph.h @@ -150,6 +150,7 @@ struct cam_periph { ac_code deferred_ac; struct task periph_run_task; uma_zone_t ccb_zone; + struct root_hold_token periph_rootmount; }; #define CAM_PERIPH_MAXMAPS 2 @@ -175,6 +176,8 @@ void cam_periph_release_locked(struct cam_periph *periph); void cam_periph_release_locked_buses(struct cam_periph *periph); int cam_periph_hold(struct cam_periph *periph, int priority); void cam_periph_unhold(struct cam_periph *periph); +void cam_periph_hold_boot(struct cam_periph *periph); +void cam_periph_release_boot(struct cam_periph *periph); void cam_periph_invalidate(struct cam_periph *periph); int cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo, diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c index 1175a0e99b3c..4a50f103ddb5 100644 --- a/sys/cam/nvme/nvme_da.c +++ b/sys/cam/nvme/nvme_da.c @@ -886,7 +886,7 @@ ndaregister(struct cam_periph *periph, void *arg) /* * Register this media as a disk */ - (void)cam_periph_hold(periph, PRIBIO); + (void)cam_periph_acquire(periph); cam_periph_unlock(periph); snprintf(announce_buf, sizeof(announce_buf), "kern.cam.nda.%d.quirks", periph->unit_number); @@ -964,20 +964,7 @@ ndaregister(struct cam_periph *periph, void *arg) if (nda_nvd_compat) disk_add_alias(disk, "nvd"); - /* - * Acquire a reference to the periph before we register with GEOM. - * We'll release this reference once GEOM calls us back (via - * ndadiskgonecb()) telling us that our provider has been freed. - */ - if (cam_periph_acquire(periph) != 0) { - xpt_print(periph->path, "%s: lost periph during " - "registration!\n", __func__); - cam_periph_lock(periph); - return (CAM_REQ_CMP_ERR); - } - disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); - cam_periph_unhold(periph); snprintf(announce_buf, sizeof(announce_buf), "%juMB (%ju %u byte sectors)", @@ -1002,6 +989,15 @@ ndaregister(struct cam_periph *periph, void *arg) ndaasync, periph, periph->path); softc->state = NDA_STATE_NORMAL; + + /* + * We'll release this reference once GEOM calls us back via + * ndadiskgonecb(), telling us that our provider has been freed. + */ + if (cam_periph_acquire(periph) == 0) + disk_create(softc->disk, DISK_VERSION); + + cam_periph_release_locked(periph); return(CAM_REQ_CMP); } diff --git a/sys/cam/scsi/scsi_cd.c b/sys/cam/scsi/scsi_cd.c index 1618aa3fb500..81eda857fba8 100644 --- a/sys/cam/scsi/scsi_cd.c +++ b/sys/cam/scsi/scsi_cd.c @@ -650,10 +650,10 @@ cdregister(struct cam_periph *periph, void *arg) softc->minimum_command_size = 6; /* - * Refcount and block open attempts until we are setup - * Can't block + * Take a reference on the periph while cdstart is called to finish the + * probe. The reference will be dropped in cddone at the end of probe. */ - (void)cam_periph_hold(periph, PRIBIO); + (void)cam_periph_acquire(periph); cam_periph_unlock(periph); /* * Load the user's default, if any. @@ -714,20 +714,6 @@ cdregister(struct cam_periph *periph, void *arg) softc->disk->d_hba_subdevice = cpi.hba_subdevice; snprintf(softc->disk->d_attachment, sizeof(softc->disk->d_attachment), "%s%d", cpi.dev_name, cpi.unit_number); - - /* - * Acquire a reference to the periph before we register with GEOM. - * We'll release this reference once GEOM calls us back (via - * dadiskgonecb()) telling us that our provider has been freed. - */ - if (cam_periph_acquire(periph) != 0) { - xpt_print(periph->path, "%s: lost periph during " - "registration!\n", __func__); - cam_periph_lock(periph); - return (CAM_REQ_CMP_ERR); - } - - disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); /* @@ -748,6 +734,9 @@ cdregister(struct cam_periph *periph, void *arg) 0, cdmediapoll, periph, C_PREL(1)); } + /* Released after probe when disk_create() call pass it to GEOM. */ + cam_periph_hold_boot(periph); + xpt_schedule(periph, CAM_PRIORITY_DEV); return(CAM_REQ_CMP); } @@ -1385,7 +1374,16 @@ cddone(struct cam_periph *periph, union ccb *done_ccb) * operation. */ xpt_release_ccb(done_ccb); - cam_periph_unhold(periph); + + /* + * We'll release this reference once GEOM calls us back via + * cddiskgonecb(), telling us that our provider has been freed. + */ + if (cam_periph_acquire(periph) == 0) + disk_create(softc->disk, DISK_VERSION); + + cam_periph_release_boot(periph); + cam_periph_release_locked(periph); return; } case CD_CCB_TUR: diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 4c298af3432d..a85a818b1e3f 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -329,7 +329,6 @@ typedef enum { DA_REF_OPEN = 1, DA_REF_OPEN_HOLD, DA_REF_CLOSE_HOLD, - DA_REF_PROBE_HOLD, DA_REF_TUR, DA_REF_GEOM, DA_REF_SYSCTL, @@ -2602,9 +2601,17 @@ daprobedone(struct cam_periph *periph, union ccb *ccb) wakeup(&softc->disk->d_mediasize); if ((softc->flags & DA_FLAG_ANNOUNCED) == 0) { softc->flags |= DA_FLAG_ANNOUNCED; - da_periph_unhold(periph, DA_REF_PROBE_HOLD); - } else - da_periph_release_locked(periph, DA_REF_REPROBE); + + /* + * We'll release this reference once GEOM calls us back via + * dadiskgonecb(), telling us that our provider has been freed. + */ + if (da_periph_acquire(periph, DA_REF_GEOM) == 0) + disk_create(softc->disk, DISK_VERSION); + + cam_periph_release_boot(periph); + } + da_periph_release_locked(periph, DA_REF_REPROBE); } static void @@ -2864,14 +2871,10 @@ daregister(struct cam_periph *periph, void *arg) } /* - * Take an exclusive section lock on the periph while dastart is called - * to finish the probe. The lock will be dropped in dadone at the end - * of probe. This locks out daopen and daclose from racing with the - * probe. - * - * XXX if cam_periph_hold returns an error, we don't hold a refcount. + * Take a reference on the periph while dastart is called to finish the + * probe. The reference will be dropped in dadone at the end of probe. */ - (void)da_periph_hold(periph, PRIBIO, DA_REF_PROBE_HOLD); + (void)da_periph_acquire(periph, DA_REF_REPROBE); /* * Schedule a periodic event to occasionally send an @@ -2967,20 +2970,6 @@ daregister(struct cam_periph *periph, void *arg) softc->disk->d_hba_subdevice = cpi.hba_subdevice; snprintf(softc->disk->d_attachment, sizeof(softc->disk->d_attachment), "%s%d", cpi.dev_name, cpi.unit_number); - - /* - * Acquire a reference to the periph before we register with GEOM. - * We'll release this reference once GEOM calls us back (via - * dadiskgonecb()) telling us that our provider has been freed. - */ - if (da_periph_acquire(periph, DA_REF_GEOM) != 0) { - xpt_print(periph->path, "%s: lost periph during " - "registration!\n", __func__); - cam_periph_lock(periph); - return (CAM_REQ_CMP_ERR); - } - - disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); /* @@ -2994,14 +2983,6 @@ daregister(struct cam_periph *periph, void *arg) AC_ADVINFO_CHANGED | AC_SCSI_AEN | AC_UNIT_ATTENTION | AC_INQ_CHANGED, daasync, periph, periph->path); - /* - * Emit an attribute changed notification just in case - * physical path information arrived before our async - * event handler was registered, but after anyone attaching - * to our disk device polled it. - */ - disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); - /* * Schedule a periodic media polling events. */ @@ -3013,8 +2994,10 @@ daregister(struct cam_periph *periph, void *arg) 0, damediapoll, periph, C_PREL(1)); } - xpt_schedule(periph, CAM_PRIORITY_DEV); + /* Released after probe when disk_create() call pass it to GEOM. */ + cam_periph_hold_boot(periph); + xpt_schedule(periph, CAM_PRIORITY_DEV); return(CAM_REQ_CMP); } diff --git a/sys/cam/scsi/scsi_enc.c b/sys/cam/scsi/scsi_enc.c index b8afdf938a3a..d823bc24101d 100644 --- a/sys/cam/scsi/scsi_enc.c +++ b/sys/cam/scsi/scsi_enc.c @@ -205,7 +205,7 @@ enc_dtor(struct cam_periph *periph) if (enc->enc_vec.softc_cleanup != NULL) enc->enc_vec.softc_cleanup(enc); - root_mount_rel(&enc->enc_rootmount); + cam_periph_release_boot(periph); ENC_FREE(enc); } @@ -842,7 +842,7 @@ enc_daemon(void *arg) * We've been through our state machine at least * once. Allow the transition to userland. */ - root_mount_rel(&enc->enc_rootmount); + cam_periph_release_boot(enc->periph); callout_reset_sbt(&enc->status_updater, 60 * SBT_1S, 0, enc_status_updater, enc, C_PREL(1)); @@ -937,9 +937,8 @@ enc_ctor(struct cam_periph *periph, void *arg) * through our state machine so that physical path data is * present. */ - if (enc->enc_vec.poll_status != NULL) { - root_mount_hold_token(periph->periph_name, &enc->enc_rootmount); - } + if (enc->enc_vec.poll_status != NULL) + cam_periph_hold_boot(periph); /* * The softc field is set only once the enc is fully initialized diff --git a/sys/cam/scsi/scsi_enc_internal.h b/sys/cam/scsi/scsi_enc_internal.h index e57634d065df..2840b0617e68 100644 --- a/sys/cam/scsi/scsi_enc_internal.h +++ b/sys/cam/scsi/scsi_enc_internal.h @@ -161,8 +161,6 @@ struct enc_softc { struct enc_fsm_state *enc_fsm_states; - struct root_hold_token enc_rootmount; - #define ENC_ANNOUNCE_SZ 400 char announce_buf[ENC_ANNOUNCE_SZ]; }; diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c index c062b0e777ac..90fe0216dd8b 100644 --- a/sys/geom/geom_disk.c +++ b/sys/geom/geom_disk.c @@ -721,6 +721,11 @@ g_disk_create(void *arg, int flag) sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF); sc->dp = dp; + if (dp->d_devstat == NULL) { + dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit, + dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, + DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); + } sc->d_devstat = dp->d_devstat; gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); gp->softc = sc; @@ -862,6 +867,9 @@ disk_alloc(void) dp = g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO); LIST_INIT(&dp->d_aliases); + dp->d_init_level = DISK_INIT_NONE; + dp->d_cevent = g_alloc_event(M_WAITOK); + dp->d_devent = g_alloc_event(M_WAITOK); return (dp); } @@ -888,31 +896,40 @@ disk_create(struct disk *dp, int version) KASSERT(dp->d_name != NULL, ("disk_create need d_name")); KASSERT(*dp->d_name != 0, ("disk_create need d_name")); KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long")); - if (dp->d_devstat == NULL) - dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit, - dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, - DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); - dp->d_geom = NULL; - dp->d_event = g_alloc_event(M_WAITOK); - - dp->d_init_level = DISK_INIT_NONE; - g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident)); - g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL); + + dp->d_init_level = DISK_INIT_CREATE; + + KASSERT(dp->d_cevent != NULL, + ("Disk create for %p with event NULL", dp)); + g_post_event_ep(g_disk_create, dp, dp->d_cevent, dp, NULL); } void disk_destroy(struct disk *dp) { + struct disk_alias *dap, *daptmp; - KASSERT(dp->d_event != NULL, + /* If disk_create() was never called, just free the resources. */ + if (dp->d_init_level < DISK_INIT_CREATE) { + if (dp->d_devstat != NULL) + devstat_remove_entry(dp->d_devstat); + LIST_FOREACH_SAFE(dap, &dp->d_aliases, da_next, daptmp) + g_free(dap); + g_free(dp->d_cevent); + g_free(dp->d_devent); + g_free(dp); + return; + } + + KASSERT(dp->d_devent != NULL, ("Disk destroy for %p with event NULL", dp)); disk_gone(dp); dp->d_destroyed = 1; g_cancel_event(dp); if (dp->d_devstat != NULL) devstat_remove_entry(dp->d_devstat); - g_post_event_ep(g_disk_destroy, dp, dp->d_event, NULL); + g_post_event_ep(g_disk_destroy, dp, dp->d_devent, NULL); } void @@ -979,14 +996,14 @@ disk_gone(struct disk *dp) void disk_attr_changed(struct disk *dp, const char *attr, int flag) { - struct g_geom *gp; + struct g_geom *gp = dp->d_geom; struct g_provider *pp; char devnamebuf[128]; - gp = dp->d_geom; - if (gp != NULL) - LIST_FOREACH(pp, &gp->provider, provider) - (void)g_attr_changed(pp, attr, flag); + if (gp == NULL) + return; + LIST_FOREACH(pp, &gp->provider, provider) + (void)g_attr_changed(pp, attr, flag); snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name, dp->d_unit); devctl_notify("GEOM", "disk", attr, devnamebuf); @@ -995,34 +1012,32 @@ disk_attr_changed(struct disk *dp, const char *attr, int flag) void disk_media_changed(struct disk *dp, int flag) { - struct g_geom *gp; + struct g_geom *gp = dp->d_geom; struct g_provider *pp; - gp = dp->d_geom; - if (gp != NULL) { - pp = LIST_FIRST(&gp->provider); - if (pp != NULL) { - KASSERT(LIST_NEXT(pp, provider) == NULL, - ("geom %p has more than one provider", gp)); - g_media_changed(pp, flag); - } + if (gp == NULL) + return; + pp = LIST_FIRST(&gp->provider); + if (pp != NULL) { + KASSERT(LIST_NEXT(pp, provider) == NULL, + ("geom %p has more than one provider", gp)); + g_media_changed(pp, flag); } } void disk_media_gone(struct disk *dp, int flag) { - struct g_geom *gp; + struct g_geom *gp = dp->d_geom; struct g_provider *pp; - gp = dp->d_geom; - if (gp != NULL) { - pp = LIST_FIRST(&gp->provider); - if (pp != NULL) { - KASSERT(LIST_NEXT(pp, provider) == NULL, - ("geom %p has more than one provider", gp)); - g_media_gone(pp, flag); - } + if (gp == NULL) + return; + pp = LIST_FIRST(&gp->provider); + if (pp != NULL) { + KASSERT(LIST_NEXT(pp, provider) == NULL, + ("geom %p has more than one provider", gp)); + g_media_gone(pp, flag); } } diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h index 10a14c9c1da7..0ce3a0a74fec 100644 --- a/sys/geom/geom_disk.h +++ b/sys/geom/geom_disk.h @@ -69,6 +69,7 @@ struct devstat; typedef enum { DISK_INIT_NONE, + DISK_INIT_CREATE, DISK_INIT_START, DISK_INIT_DONE } disk_init_level; @@ -125,7 +126,8 @@ struct disk { /* Fields private to geom_disk, to be moved on next version bump */ LIST_HEAD(,disk_alias) d_aliases; - struct g_event *d_event; + struct g_event *d_cevent; + struct g_event *d_devent; }; #define DISKFLAG_RESERVED 0x0001 /* Was NEEDSGIANT */