1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-12 09:58:36 +00:00

Add BIO_DELETE support to ada(4):

- For SSDs use TRIM feature of DATA SET MANAGEMENT command, as defined by
ACS-2 specification working draft.
- For CompactFlash use CFA ERASE command, same as ad(4) does.

With this patch, `newfs -E /dev/ada1` was able to restore write speed of
my heavily weared OCZ Vertex SSD (firmware 1.4) up to the initial level
for the most part of it's capacity. Previous 1.3 firmware, even reportiong
TRIM capabilty bit set, was not working, reporting ABORT error for every
DSM command.

I have no idea whether it is normal, but for some reason it takes 200ms
to handle any TRIM command on this drive, that was making delete extremely
slow. But TRIM command is able to accept long list of LBAs and the length of
that list seems doesn't affect it's execution time. Implemented request
clusting algorithm allowed me to rise delete rate up to reasonable numbers,
when many parallel DELETE requests running.
This commit is contained in:
Alexander Motin 2009-12-28 20:08:01 +00:00
parent a4660d594f
commit 1c80ec0a6b
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=201139
4 changed files with 261 additions and 117 deletions

View File

@ -75,6 +75,11 @@ ata_op_string(struct ata_cmd *cmd)
switch (cmd->command) {
case 0x00: return ("NOP");
case 0x03: return ("CFA_REQUEST_EXTENDED_ERROR");
case 0x06:
switch (cmd->features) {
case 0x01: return ("DSM TRIM");
}
return "DSM";
case 0x08: return ("DEVICE_RESET");
case 0x20: return ("READ");
case 0x24: return ("READ48");
@ -338,7 +343,8 @@ ata_48bit_cmd(struct ccb_ataio *ataio, uint8_t cmd, uint16_t features,
cmd == ATA_WRITE_DMA_FUA48 ||
cmd == ATA_WRITE_DMA_QUEUED48 ||
cmd == ATA_WRITE_DMA_QUEUED_FUA48 ||
cmd == ATA_WRITE_STREAM_DMA48)
cmd == ATA_WRITE_STREAM_DMA48 ||
cmd == ATA_DATA_SET_MANAGEMENT)
ataio->cmd.flags |= CAM_ATAIO_DMA;
ataio->cmd.command = cmd;
ataio->cmd.features = features;

View File

@ -74,8 +74,10 @@ typedef enum {
ADA_FLAG_CAN_DMA = 0x010,
ADA_FLAG_NEED_OTAG = 0x020,
ADA_FLAG_WENT_IDLE = 0x040,
ADA_FLAG_CAN_TRIM = 0x080,
ADA_FLAG_OPEN = 0x100,
ADA_FLAG_SCTX_INIT = 0x200
ADA_FLAG_SCTX_INIT = 0x200,
ADA_FLAG_CAN_CFA = 0x400
} ada_flags;
typedef enum {
@ -86,6 +88,7 @@ typedef enum {
ADA_CCB_BUFFER_IO = 0x03,
ADA_CCB_WAITING = 0x04,
ADA_CCB_DUMP = 0x05,
ADA_CCB_TRIM = 0x06,
ADA_CCB_TYPE_MASK = 0x0F,
} ada_ccb_state;
@ -101,13 +104,23 @@ struct disk_params {
u_int64_t sectors; /* Total number sectors */
};
#define TRIM_MAX_BLOCKS 4
#define TRIM_MAX_RANGES TRIM_MAX_BLOCKS * 64
struct trim_request {
uint8_t data[TRIM_MAX_RANGES * 8];
struct bio *bps[TRIM_MAX_RANGES];
};
struct ada_softc {
struct bio_queue_head bio_queue;
struct bio_queue_head trim_queue;
ada_state state;
ada_flags flags;
ada_quirks quirks;
int ordered_tag_count;
int outstanding_cmds;
int trim_max_ranges;
int trim_running;
struct disk_params params;
struct disk *disk;
union ccb saved_ccb;
@ -115,6 +128,7 @@ struct ada_softc {
struct sysctl_ctx_list sysctl_ctx;
struct sysctl_oid *sysctl_tree;
struct callout sendordered_c;
struct trim_request trim_req;
};
struct ada_quirk_entry {
@ -309,6 +323,18 @@ adaclose(struct disk *dp)
return (0);
}
static void
adaschedule(struct cam_periph *periph)
{
struct ada_softc *softc = (struct ada_softc *)periph->softc;
if (bioq_first(&softc->bio_queue) ||
(!softc->trim_running && bioq_first(&softc->trim_queue))) {
/* Have more work to do, so ensure we stay scheduled */
xpt_schedule(periph, CAM_PRIORITY_NORMAL);
}
}
/*
* Actually translate the requested transfer into one the physical driver
* can understand. The transfer is described by a buf and will include
@ -341,12 +367,16 @@ adastrategy(struct bio *bp)
/*
* Place it in the queue of disk activities for this disk
*/
bioq_disksort(&softc->bio_queue, bp);
if (bp->bio_cmd == BIO_DELETE &&
(softc->flags & ADA_FLAG_CAN_TRIM))
bioq_disksort(&softc->trim_queue, bp);
else
bioq_disksort(&softc->bio_queue, bp);
/*
* Schedule ourselves for performing the work.
*/
xpt_schedule(periph, CAM_PRIORITY_NORMAL);
adaschedule(periph);
cam_periph_unlock(periph);
return;
@ -485,6 +515,7 @@ adaoninvalidate(struct cam_periph *periph)
* with XPT_ABORT_CCB.
*/
bioq_flush(&softc->bio_queue, NULL, ENXIO);
bioq_flush(&softc->trim_queue, NULL, ENXIO);
disk_gone(softc->disk);
xpt_print(periph->path, "lost device\n");
@ -618,6 +649,7 @@ adaregister(struct cam_periph *periph, void *arg)
}
bioq_init(&softc->bio_queue);
bioq_init(&softc->trim_queue);
if (cgd->ident_data.capabilities1 & ATA_SUPPORT_DMA)
softc->flags |= ADA_FLAG_CAN_DMA;
@ -628,6 +660,17 @@ adaregister(struct cam_periph *periph, void *arg)
if (cgd->ident_data.satacapabilities & ATA_SUPPORT_NCQ &&
cgd->inq_flags & SID_CmdQue)
softc->flags |= ADA_FLAG_CAN_NCQ;
if (cgd->ident_data.support_dsm & ATA_SUPPORT_DSM_TRIM) {
softc->flags |= ADA_FLAG_CAN_TRIM;
softc->trim_max_ranges = TRIM_MAX_RANGES;
if (cgd->ident_data.max_dsm_blocks != 0) {
softc->trim_max_ranges =
min(cgd->ident_data.max_dsm_blocks * 64,
softc->trim_max_ranges);
}
}
if (cgd->ident_data.support.command2 & ATA_SUPPORT_CFA)
softc->flags |= ADA_FLAG_CAN_CFA;
softc->state = ADA_STATE_NORMAL;
periph->softc = softc;
@ -672,7 +715,7 @@ adaregister(struct cam_periph *periph, void *arg)
maxio = DFLTPHYS; /* traditional default */
else if (maxio > MAXPHYS)
maxio = MAXPHYS; /* for safety */
if (cgd->ident_data.support.command2 & ATA_SUPPORT_ADDRESS48)
if (softc->flags & ADA_FLAG_CAN_48BIT)
maxio = min(maxio, 65536 * softc->params.secsize);
else /* 28bit ATA command limit */
maxio = min(maxio, 256 * softc->params.secsize);
@ -681,6 +724,10 @@ adaregister(struct cam_periph *periph, void *arg)
softc->disk->d_flags = 0;
if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE)
softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
if ((softc->flags & ADA_FLAG_CAN_TRIM) ||
((softc->flags & ADA_FLAG_CAN_CFA) &&
!(softc->flags & ADA_FLAG_CAN_48BIT)))
softc->disk->d_flags |= DISKFLAG_CANDELETE;
strlcpy(softc->disk->d_ident, cgd->serial_num,
MIN(sizeof(softc->disk->d_ident), cgd->serial_num_len + 1));
@ -743,13 +790,10 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
switch (softc->state) {
case ADA_STATE_NORMAL:
{
/* Pull a buffer from the queue and get going on it */
struct bio *bp;
u_int8_t tag_code;
/*
* See if there is a buf with work for us to do..
*/
bp = bioq_first(&softc->bio_queue);
/* Execute immediate CCB if waiting. */
if (periph->immediate_priority <= periph->pinfo.priority) {
CAM_DEBUG_PRINT(CAM_DEBUG_SUBTRACE,
("queuing for immediate ccb\n"));
@ -758,115 +802,188 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
periph_links.sle);
periph->immediate_priority = CAM_PRIORITY_NONE;
wakeup(&periph->ccb_list);
} else if (bp == NULL) {
/* Have more work to do, so ensure we stay scheduled */
adaschedule(periph);
break;
}
/* Run TRIM if not running yet. */
if (!softc->trim_running &&
(bp = bioq_first(&softc->trim_queue)) != 0) {
struct trim_request *req = &softc->trim_req;
struct bio *bp1;
int bps = 0, ranges = 0;
softc->trim_running = 1;
bzero(req, sizeof(*req));
bp1 = bp;
do {
uint64_t lba = bp1->bio_pblkno;
int count = bp1->bio_bcount /
softc->params.secsize;
bioq_remove(&softc->trim_queue, bp1);
while (count > 0) {
int c = min(count, 0xffff);
int off = ranges * 8;
req->data[off + 0] = lba & 0xff;
req->data[off + 1] = (lba >> 8) & 0xff;
req->data[off + 2] = (lba >> 16) & 0xff;
req->data[off + 3] = (lba >> 24) & 0xff;
req->data[off + 4] = (lba >> 32) & 0xff;
req->data[off + 5] = (lba >> 40) & 0xff;
req->data[off + 6] = c & 0xff;
req->data[off + 7] = (c >> 8) & 0xff;
lba += c;
count -= c;
ranges++;
}
req->bps[bps++] = bp1;
bp1 = bioq_first(&softc->trim_queue);
if (bp1 == NULL ||
bp1->bio_bcount / softc->params.secsize >
(softc->trim_max_ranges - ranges) * 0xffff)
break;
} while (1);
cam_fill_ataio(ataio,
ada_retry_count,
adadone,
CAM_DIR_OUT,
0,
req->data,
((ranges + 63) / 64) * 512,
ada_default_timeout * 1000);
ata_48bit_cmd(ataio, ATA_DATA_SET_MANAGEMENT,
ATA_DSM_TRIM, 0, (ranges + 63) / 64);
start_ccb->ccb_h.ccb_state = ADA_CCB_TRIM;
goto out;
}
/* Run regular command. */
bp = bioq_first(&softc->bio_queue);
if (bp == NULL) {
xpt_release_ccb(start_ccb);
break;
}
bioq_remove(&softc->bio_queue, bp);
if ((softc->flags & ADA_FLAG_NEED_OTAG) != 0) {
softc->flags &= ~ADA_FLAG_NEED_OTAG;
softc->ordered_tag_count++;
tag_code = 0;
} else {
u_int8_t tag_code;
tag_code = 1;
}
switch (bp->bio_cmd) {
case BIO_READ:
case BIO_WRITE:
{
uint64_t lba = bp->bio_pblkno;
uint16_t count = bp->bio_bcount / softc->params.secsize;
bioq_remove(&softc->bio_queue, bp);
cam_fill_ataio(ataio,
ada_retry_count,
adadone,
bp->bio_cmd == BIO_READ ?
CAM_DIR_IN : CAM_DIR_OUT,
tag_code,
bp->bio_data,
bp->bio_bcount,
ada_default_timeout*1000);
if ((softc->flags & ADA_FLAG_NEED_OTAG) != 0) {
softc->flags &= ~ADA_FLAG_NEED_OTAG;
softc->ordered_tag_count++;
tag_code = 0;
} else {
tag_code = 1;
}
switch (bp->bio_cmd) {
case BIO_READ:
case BIO_WRITE:
{
uint64_t lba = bp->bio_pblkno;
uint16_t count = bp->bio_bcount / softc->params.secsize;
cam_fill_ataio(ataio,
ada_retry_count,
adadone,
bp->bio_cmd == BIO_READ ?
CAM_DIR_IN : CAM_DIR_OUT,
tag_code,
bp->bio_data,
bp->bio_bcount,
ada_default_timeout*1000);
if ((softc->flags & ADA_FLAG_CAN_NCQ) && tag_code) {
if ((softc->flags & ADA_FLAG_CAN_NCQ) && tag_code) {
if (bp->bio_cmd == BIO_READ) {
ata_ncq_cmd(ataio, ATA_READ_FPDMA_QUEUED,
lba, count);
} else {
ata_ncq_cmd(ataio, ATA_WRITE_FPDMA_QUEUED,
lba, count);
}
} else if ((softc->flags & ADA_FLAG_CAN_48BIT) &&
(lba + count >= ATA_MAX_28BIT_LBA ||
count > 256)) {
if (softc->flags & ADA_FLAG_CAN_DMA) {
if (bp->bio_cmd == BIO_READ) {
ata_ncq_cmd(ataio, ATA_READ_FPDMA_QUEUED,
lba, count);
ata_48bit_cmd(ataio, ATA_READ_DMA48,
0, lba, count);
} else {
ata_ncq_cmd(ataio, ATA_WRITE_FPDMA_QUEUED,
lba, count);
}
} else if ((softc->flags & ADA_FLAG_CAN_48BIT) &&
(lba + count >= ATA_MAX_28BIT_LBA ||
count > 256)) {
if (softc->flags & ADA_FLAG_CAN_DMA) {
if (bp->bio_cmd == BIO_READ) {
ata_48bit_cmd(ataio, ATA_READ_DMA48,
0, lba, count);
} else {
ata_48bit_cmd(ataio, ATA_WRITE_DMA48,
0, lba, count);
}
} else {
if (bp->bio_cmd == BIO_READ) {
ata_48bit_cmd(ataio, ATA_READ_MUL48,
0, lba, count);
} else {
ata_48bit_cmd(ataio, ATA_WRITE_MUL48,
0, lba, count);
}
ata_48bit_cmd(ataio, ATA_WRITE_DMA48,
0, lba, count);
}
} else {
if (count == 256)
count = 0;
if (softc->flags & ADA_FLAG_CAN_DMA) {
if (bp->bio_cmd == BIO_READ) {
ata_28bit_cmd(ataio, ATA_READ_DMA,
0, lba, count);
} else {
ata_28bit_cmd(ataio, ATA_WRITE_DMA,
0, lba, count);
}
if (bp->bio_cmd == BIO_READ) {
ata_48bit_cmd(ataio, ATA_READ_MUL48,
0, lba, count);
} else {
if (bp->bio_cmd == BIO_READ) {
ata_28bit_cmd(ataio, ATA_READ_MUL,
0, lba, count);
} else {
ata_28bit_cmd(ataio, ATA_WRITE_MUL,
0, lba, count);
}
ata_48bit_cmd(ataio, ATA_WRITE_MUL48,
0, lba, count);
}
}
} else {
if (count == 256)
count = 0;
if (softc->flags & ADA_FLAG_CAN_DMA) {
if (bp->bio_cmd == BIO_READ) {
ata_28bit_cmd(ataio, ATA_READ_DMA,
0, lba, count);
} else {
ata_28bit_cmd(ataio, ATA_WRITE_DMA,
0, lba, count);
}
} else {
if (bp->bio_cmd == BIO_READ) {
ata_28bit_cmd(ataio, ATA_READ_MUL,
0, lba, count);
} else {
ata_28bit_cmd(ataio, ATA_WRITE_MUL,
0, lba, count);
}
}
}
break;
case BIO_FLUSH:
cam_fill_ataio(ataio,
1,
adadone,
CAM_DIR_NONE,
0,
NULL,
0,
ada_default_timeout*1000);
if (softc->flags & ADA_FLAG_CAN_48BIT)
ata_48bit_cmd(ataio, ATA_FLUSHCACHE48, 0, 0, 0);
else
ata_28bit_cmd(ataio, ATA_FLUSHCACHE, 0, 0, 0);
break;
}
start_ccb->ccb_h.ccb_state = ADA_CCB_BUFFER_IO;
start_ccb->ccb_h.ccb_bp = bp;
softc->outstanding_cmds++;
xpt_action(start_ccb);
bp = bioq_first(&softc->bio_queue);
break;
}
case BIO_DELETE:
{
uint64_t lba = bp->bio_pblkno;
uint16_t count = bp->bio_bcount / softc->params.secsize;
if (bp != NULL) {
/* Have more work to do, so ensure we stay scheduled */
xpt_schedule(periph, CAM_PRIORITY_NORMAL);
cam_fill_ataio(ataio,
ada_retry_count,
adadone,
CAM_DIR_NONE,
0,
NULL,
0,
ada_default_timeout*1000);
if (count >= 256)
count = 0;
ata_28bit_cmd(ataio, ATA_CFA_ERASE, 0, lba, count);
break;
}
case BIO_FLUSH:
cam_fill_ataio(ataio,
1,
adadone,
CAM_DIR_NONE,
0,
NULL,
0,
ada_default_timeout*1000);
if (softc->flags & ADA_FLAG_CAN_48BIT)
ata_48bit_cmd(ataio, ATA_FLUSHCACHE48, 0, 0, 0);
else
ata_28bit_cmd(ataio, ATA_FLUSHCACHE, 0, 0, 0);
break;
}
start_ccb->ccb_h.ccb_state = ADA_CCB_BUFFER_IO;
out:
start_ccb->ccb_h.ccb_bp = bp;
softc->outstanding_cmds++;
xpt_action(start_ccb);
/* May have more work to do, so ensure we stay scheduled */
adaschedule(periph);
break;
}
}
@ -882,6 +999,7 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
ataio = &done_ccb->ataio;
switch (ataio->ccb_h.ccb_state & ADA_CCB_TYPE_MASK) {
case ADA_CCB_BUFFER_IO:
case ADA_CCB_TRIM:
{
struct bio *bp;
@ -908,13 +1026,6 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
"Invalidating pack\n");
softc->flags |= ADA_FLAG_PACK_INVALID;
}
/*
* return all queued I/O with EIO, so that
* the client can retry these I/Os in the
* proper order should it attempt to recover.
*/
bioq_flush(&softc->bio_queue, NULL, EIO);
bp->bio_error = error;
bp->bio_resid = bp->bio_bcount;
bp->bio_flags |= BIO_ERROR;
@ -940,8 +1051,27 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
softc->outstanding_cmds--;
if (softc->outstanding_cmds == 0)
softc->flags |= ADA_FLAG_WENT_IDLE;
if ((ataio->ccb_h.ccb_state & ADA_CCB_TYPE_MASK) ==
ADA_CCB_TRIM) {
struct trim_request *req =
(struct trim_request *)ataio->data_ptr;
int i;
biodone(bp);
for (i = 1; i < softc->trim_max_ranges &&
req->bps[i]; i++) {
struct bio *bp1 = req->bps[i];
bp1->bio_resid = bp->bio_resid;
bp1->bio_error = bp->bio_error;
if (bp->bio_flags & BIO_ERROR)
bp1->bio_flags |= BIO_ERROR;
biodone(bp1);
}
softc->trim_running = 0;
biodone(bp);
adaschedule(periph);
} else
biodone(bp);
break;
}
case ADA_CCB_WAITING:

View File

@ -299,8 +299,8 @@ g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
}
while (length > 0) {
chunk = length;
if (chunk > 1024 * cp->provider->sectorsize)
chunk = 1024 * cp->provider->sectorsize;
if (chunk > 65536 * cp->provider->sectorsize)
chunk = 65536 * cp->provider->sectorsize;
error = g_delete_data(cp, offset, chunk);
length -= chunk;
offset += chunk;

View File

@ -101,7 +101,9 @@ struct ata_params {
/*066*/ u_int16_t mwdmarec; /* rec. M/W DMA time ns */
/*067*/ u_int16_t pioblind; /* min. PIO cycle w/o flow */
/*068*/ u_int16_t pioiordy; /* min. PIO cycle IORDY flow */
u_int16_t reserved69;
/*069*/ u_int16_t support3;
#define ATA_SUPPORT_RZAT 0x0020
#define ATA_SUPPORT_DRAT 0x4000
u_int16_t reserved70;
/*071*/ u_int16_t rlsovlap; /* rel time (us) for overlap */
/*072*/ u_int16_t rlsservice; /* rel time (us) for service */
@ -204,7 +206,8 @@ struct ata_params {
u_int16_t lba_size48_2;
u_int16_t lba_size48_3;
u_int16_t lba_size48_4;
u_int16_t reserved104[2];
u_int16_t reserved104;
/*105*/ u_int16_t max_dsm_blocks;
/*106*/ u_int16_t pss;
#define ATA_PSS_LSPPS 0x000F
#define ATA_PSS_LSSABOVE512 0x1000
@ -230,7 +233,10 @@ struct ata_params {
/*162*/ u_int16_t cfa_kms_support;
/*163*/ u_int16_t cfa_trueide_modes;
/*164*/ u_int16_t cfa_memory_modes;
u_int16_t reserved165[11];
u_int16_t reserved165[4];
/*169*/ u_int16_t support_dsm;
#define ATA_SUPPORT_DSM_TRIM 0x0001
u_int16_t reserved170[6];
/*176*/ u_int8_t media_serial[60];
/*206*/ u_int16_t sct;
u_int16_t reserved206[2];
@ -284,6 +290,8 @@ struct ata_params {
#define ATA_NOP 0x00 /* NOP */
#define ATA_NF_FLUSHQUEUE 0x00 /* flush queued cmd's */
#define ATA_NF_AUTOPOLL 0x01 /* start autopoll function */
#define ATA_DATA_SET_MANAGEMENT 0x06
#define ATA_DSM_TRIM 0x01
#define ATA_DEVICE_RESET 0x08 /* reset device */
#define ATA_READ 0x20 /* read */
#define ATA_READ48 0x24 /* read 48bit LBA */