1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-11-26 07:55:01 +00:00

bhyve ahci: Improve robustness of TRIM handling

The previous fix for a stack buffer leak in the ahci device model
actually broke the handling of TRIM as one of the checks it added
caused TRIM commands to never be completed.  This resulted in command
timeouts if a guest OS did a 'newfs -E' of an AHCI disk, for example.
Also, for the invalid case the previous check was handling, the device
model should be failing with an error rather than claiming success.

To resolve this, validate the length of a TRIM request and fail with
an error if it exceeds the maximum number of supported blocks
advertised via IDENTIFY.  In addition, if the PRDT does not provide
enough data, fail the command with an error rather than performing a
partial completion.

This is somewhat complicated by the implementation of TRIM in the ahci
device model.  A single TRIM request can specify multiple LBA ranges.
The device model handles this by dispatching blockif_delete() requests
one at a time.  When a blockif_delete() request completes, the device
model locates the TRIM buffer and searches for the next LBA range to
handle.  Previously, the device model would re-read the trim buffer
from guest memory each time.  However, this was subject to some
unpleasant races if the guest changed the PRDT entries or CFIS while a
command was in flight.  Instead, read the buffer of trim ranges once
and cache it across multipe internal blockif requests.

Reviewed by:	mav
Fixes:		71fa171c64 bhyve: Initialize stack buffer in pci_ahci
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D47224

(cherry picked from commit 8c8ebbb045)
(cherry picked from commit 3981cf1087)
This commit is contained in:
John Baldwin 2024-10-24 10:18:09 -04:00 committed by Ed Maste
parent 114ba4f2cd
commit 2be68ecff8

View File

@ -126,6 +126,7 @@ struct ahci_ioreq {
STAILQ_ENTRY(ahci_ioreq) io_flist;
TAILQ_ENTRY(ahci_ioreq) io_blist;
uint8_t *cfis;
uint8_t *dsm;
uint32_t len;
uint32_t done;
int slot;
@ -213,6 +214,8 @@ struct pci_ahci_softc {
};
#define ahci_ctx(sc) ((sc)->asc_pi->pi_vmctx)
static void ahci_handle_next_trim(struct ahci_port *p, int slot, uint8_t *cfis,
uint8_t *buf, uint32_t len, uint32_t done);
static void ahci_handle_port(struct ahci_port *p);
static inline void lba_to_msf(uint8_t *buf, int lba)
@ -812,18 +815,14 @@ read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
}
static void
ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis)
{
struct ahci_ioreq *aior;
struct blockif_req *breq;
uint8_t *entry;
uint64_t elba;
uint32_t len, elen;
int err, first, ncq;
uint8_t buf[512];
unsigned int written;
uint32_t len;
int ncq;
uint8_t *buf;
unsigned int nread;
first = (done == 0);
buf = NULL;
if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
len = (uint16_t)cfis[13] << 8 | cfis[12];
len *= 512;
@ -833,39 +832,84 @@ ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done
len *= 512;
ncq = 1;
}
written = read_prdt(p, slot, cfis, buf, sizeof(buf));
memset(buf + written, 0, sizeof(buf) - written);
next:
if (done >= sizeof(buf) - 8)
return;
entry = &buf[done];
elba = ((uint64_t)entry[5] << 40) |
((uint64_t)entry[4] << 32) |
((uint64_t)entry[3] << 24) |
((uint64_t)entry[2] << 16) |
((uint64_t)entry[1] << 8) |
entry[0];
elen = (uint16_t)entry[7] << 8 | entry[6];
done += 8;
if (elen == 0) {
if (done >= len) {
if (ncq) {
if (first)
ahci_write_fis_d2h_ncq(p, slot);
ahci_write_fis_sdb(p, slot, cfis,
ATA_S_READY | ATA_S_DSC);
} else {
ahci_write_fis_d2h(p, slot, cfis,
ATA_S_READY | ATA_S_DSC);
}
/* Support for only a single block is advertised via IDENTIFY. */
if (len > 512) {
goto invalid_command;
}
buf = malloc(len);
nread = read_prdt(p, slot, cfis, buf, len);
if (nread != len) {
goto invalid_command;
}
ahci_handle_next_trim(p, slot, cfis, buf, len, 0);
return;
invalid_command:
free(buf);
if (ncq) {
ahci_write_fis_d2h_ncq(p, slot);
ahci_write_fis_sdb(p, slot, cfis,
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
} else {
ahci_write_fis_d2h(p, slot, cfis,
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
}
}
static void
ahci_handle_next_trim(struct ahci_port *p, int slot, uint8_t *cfis,
uint8_t *buf, uint32_t len, uint32_t done)
{
struct ahci_ioreq *aior;
struct blockif_req *breq;
uint8_t *entry;
uint64_t elba;
uint32_t elen;
int err;
bool first, ncq;
first = (done == 0);
if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
ncq = false;
} else { /* ATA_SEND_FPDMA_QUEUED */
ncq = true;
}
/* Find the next range to TRIM. */
while (done < len) {
entry = &buf[done];
elba = ((uint64_t)entry[5] << 40) |
((uint64_t)entry[4] << 32) |
((uint64_t)entry[3] << 24) |
((uint64_t)entry[2] << 16) |
((uint64_t)entry[1] << 8) |
entry[0];
elen = (uint16_t)entry[7] << 8 | entry[6];
done += 8;
if (elen != 0)
break;
}
/* All remaining ranges were empty. */
if (done == len) {
free(buf);
if (ncq) {
if (first)
ahci_write_fis_d2h_ncq(p, slot);
ahci_write_fis_sdb(p, slot, cfis,
ATA_S_READY | ATA_S_DSC);
} else {
ahci_write_fis_d2h(p, slot, cfis,
ATA_S_READY | ATA_S_DSC);
}
if (!first) {
p->pending &= ~(1 << slot);
ahci_check_stopped(p);
if (!first)
ahci_handle_port(p);
return;
ahci_handle_port(p);
}
goto next;
return;
}
/*
@ -878,6 +922,7 @@ ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done
aior->slot = slot;
aior->len = len;
aior->done = done;
aior->dsm = buf;
aior->more = (len != done);
breq = &aior->io_req;
@ -1755,7 +1800,7 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
case ATA_DATA_SET_MANAGEMENT:
if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
cfis[13] == 0 && cfis[12] == 1) {
ahci_handle_dsm_trim(p, slot, cfis, 0);
ahci_handle_dsm_trim(p, slot, cfis);
break;
}
ahci_write_fis_d2h(p, slot, cfis,
@ -1765,7 +1810,7 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
cfis[11] == 0 && cfis[3] == 1) {
ahci_handle_dsm_trim(p, slot, cfis, 0);
ahci_handle_dsm_trim(p, slot, cfis);
break;
}
ahci_write_fis_d2h(p, slot, cfis,
@ -1903,12 +1948,12 @@ ata_ioreq_cb(struct blockif_req *br, int err)
struct ahci_port *p;
struct pci_ahci_softc *sc;
uint32_t tfd;
uint8_t *cfis;
int slot, ncq, dsm;
uint8_t *cfis, *dsm;
int slot, ncq;
DPRINTF("%s %d", __func__, err);
ncq = dsm = 0;
ncq = 0;
aior = br->br_param;
p = aior->io_pr;
cfis = aior->cfis;
@ -1920,10 +1965,8 @@ ata_ioreq_cb(struct blockif_req *br, int err)
cfis[2] == ATA_READ_FPDMA_QUEUED ||
cfis[2] == ATA_SEND_FPDMA_QUEUED)
ncq = 1;
if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
(cfis[2] == ATA_SEND_FPDMA_QUEUED &&
(cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
dsm = 1;
dsm = aior->dsm;
aior->dsm = NULL;
pthread_mutex_lock(&sc->mtx);
@ -1941,8 +1984,9 @@ ata_ioreq_cb(struct blockif_req *br, int err)
hdr->prdbc = aior->done;
if (!err && aior->more) {
if (dsm)
ahci_handle_dsm_trim(p, slot, cfis, aior->done);
if (dsm != NULL)
ahci_handle_next_trim(p, slot, cfis, dsm,
aior->len, aior->done);
else
ahci_handle_rw(p, slot, cfis, aior->done);
goto out;
@ -1964,6 +2008,7 @@ ata_ioreq_cb(struct blockif_req *br, int err)
ahci_check_stopped(p);
ahci_handle_port(p);
free(dsm);
out:
pthread_mutex_unlock(&sc->mtx);
DPRINTF("%s exit", __func__);