mirror of
https://git.FreeBSD.org/src.git
synced 2024-11-28 08:02:54 +00:00
846d702f23
Sponsored by: Chelsio Communications
1483 lines
37 KiB
C
1483 lines
37 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2022-2024 Chelsio Communications, Inc.
|
|
* Written by: John Baldwin <jhb@FreeBSD.org>
|
|
*/
|
|
|
|
#include <sys/endian.h>
|
|
#include <sys/gsb_crc32.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/uio.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "libnvmf.h"
|
|
#include "internal.h"
|
|
#include "nvmf_tcp.h"
|
|
|
|
struct nvmf_tcp_qpair;
|
|
|
|
struct nvmf_tcp_command_buffer {
|
|
struct nvmf_tcp_qpair *qp;
|
|
|
|
void *data;
|
|
size_t data_len;
|
|
size_t data_xfered;
|
|
uint32_t data_offset;
|
|
|
|
uint16_t cid;
|
|
uint16_t ttag;
|
|
|
|
LIST_ENTRY(nvmf_tcp_command_buffer) link;
|
|
};
|
|
|
|
LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer);
|
|
|
|
struct nvmf_tcp_association {
|
|
struct nvmf_association na;
|
|
|
|
uint32_t ioccsz;
|
|
};
|
|
|
|
struct nvmf_tcp_rxpdu {
|
|
struct nvme_tcp_common_pdu_hdr *hdr;
|
|
uint32_t data_len;
|
|
};
|
|
|
|
struct nvmf_tcp_capsule {
|
|
struct nvmf_capsule nc;
|
|
|
|
struct nvmf_tcp_rxpdu rx_pdu;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
TAILQ_ENTRY(nvmf_tcp_capsule) link;
|
|
};
|
|
|
|
struct nvmf_tcp_qpair {
|
|
struct nvmf_qpair qp;
|
|
int s;
|
|
|
|
uint8_t txpda;
|
|
uint8_t rxpda;
|
|
bool header_digests;
|
|
bool data_digests;
|
|
uint32_t maxr2t;
|
|
uint32_t maxh2cdata;
|
|
uint32_t max_icd; /* Host only */
|
|
uint16_t next_ttag; /* Controller only */
|
|
|
|
struct nvmf_tcp_command_buffer_list tx_buffers;
|
|
struct nvmf_tcp_command_buffer_list rx_buffers;
|
|
TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules;
|
|
};
|
|
|
|
#define TASSOC(nc) ((struct nvmf_tcp_association *)(na))
|
|
#define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc))
|
|
#define CTCAP(nc) ((const struct nvmf_tcp_capsule *)(nc))
|
|
#define TQP(qp) ((struct nvmf_tcp_qpair *)(qp))
|
|
|
|
static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET];
|
|
|
|
static uint32_t
|
|
compute_digest(const void *buf, size_t len)
|
|
{
|
|
return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff);
|
|
}
|
|
|
|
static struct nvmf_tcp_command_buffer *
|
|
tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data,
|
|
uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag,
|
|
bool receive)
|
|
{
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
cb = malloc(sizeof(*cb));
|
|
cb->qp = qp;
|
|
cb->data = data;
|
|
cb->data_offset = data_offset;
|
|
cb->data_len = data_len;
|
|
cb->data_xfered = 0;
|
|
cb->cid = cid;
|
|
cb->ttag = ttag;
|
|
|
|
if (receive)
|
|
LIST_INSERT_HEAD(&qp->rx_buffers, cb, link);
|
|
else
|
|
LIST_INSERT_HEAD(&qp->tx_buffers, cb, link);
|
|
return (cb);
|
|
}
|
|
|
|
static struct nvmf_tcp_command_buffer *
|
|
tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
bool receive)
|
|
{
|
|
struct nvmf_tcp_command_buffer_list *list;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
list = receive ? &qp->rx_buffers : &qp->tx_buffers;
|
|
LIST_FOREACH(cb, list, link) {
|
|
if (cb->cid == cid && cb->ttag == ttag)
|
|
return (cb);
|
|
}
|
|
return (NULL);
|
|
}
|
|
|
|
static void
|
|
tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
bool receive)
|
|
{
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
cb = tcp_find_command_buffer(qp, cid, ttag, receive);
|
|
if (cb != NULL)
|
|
LIST_REMOVE(cb, link);
|
|
}
|
|
|
|
static void
|
|
tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb)
|
|
{
|
|
LIST_REMOVE(cb, link);
|
|
free(cb);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len)
|
|
{
|
|
ssize_t nwritten;
|
|
const char *cp;
|
|
|
|
cp = pdu;
|
|
while (len != 0) {
|
|
nwritten = write(qp->s, cp, len);
|
|
if (nwritten < 0)
|
|
return (errno);
|
|
len -= nwritten;
|
|
cp += nwritten;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov,
|
|
u_int iovcnt, size_t len)
|
|
{
|
|
ssize_t nwritten;
|
|
|
|
for (;;) {
|
|
nwritten = writev(qp->s, iov, iovcnt);
|
|
if (nwritten < 0)
|
|
return (errno);
|
|
|
|
len -= nwritten;
|
|
if (len == 0)
|
|
return (0);
|
|
|
|
while (iov->iov_len <= (size_t)nwritten) {
|
|
nwritten -= iov->iov_len;
|
|
iovcnt--;
|
|
iov++;
|
|
}
|
|
|
|
iov->iov_base = (char *)iov->iov_base + nwritten;
|
|
iov->iov_len -= nwritten;
|
|
}
|
|
}
|
|
|
|
static void
|
|
nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen)
|
|
{
|
|
struct nvme_tcp_term_req_hdr hdr;
|
|
struct iovec iov[2];
|
|
|
|
if (hlen != 0) {
|
|
if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE)
|
|
hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
|
|
if (hlen > pdu_len)
|
|
hlen = pdu_len;
|
|
}
|
|
|
|
memset(&hdr, 0, sizeof(hdr));
|
|
hdr.common.pdu_type = na->na_controller ?
|
|
NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
|
|
hdr.common.hlen = sizeof(hdr);
|
|
hdr.common.plen = sizeof(hdr) + hlen;
|
|
hdr.fes = htole16(fes);
|
|
le32enc(hdr.fei, fei);
|
|
iov[0].iov_base = &hdr;
|
|
iov[0].iov_len = sizeof(hdr);
|
|
iov[1].iov_base = __DECONST(void *, rx_pdu);
|
|
iov[1].iov_len = hlen;
|
|
|
|
(void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen);
|
|
close(qp->s);
|
|
qp->s = -1;
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu,
|
|
size_t pdu_len)
|
|
{
|
|
const struct nvme_tcp_common_pdu_hdr *ch;
|
|
uint32_t data_len, fei, plen;
|
|
uint32_t digest, rx_digest;
|
|
u_int hlen;
|
|
int error;
|
|
uint16_t fes;
|
|
|
|
/* Determine how large of a PDU header to return for errors. */
|
|
ch = pdu->hdr;
|
|
hlen = ch->hlen;
|
|
plen = le32toh(ch->plen);
|
|
if (hlen < sizeof(*ch) || hlen > plen)
|
|
hlen = sizeof(*ch);
|
|
|
|
error = nvmf_tcp_validate_pdu_header(ch,
|
|
qp->qp.nq_association->na_controller, qp->header_digests,
|
|
qp->data_digests, qp->rxpda, &data_len, &fes, &fei);
|
|
if (error != 0) {
|
|
if (error == ECONNRESET) {
|
|
close(qp->s);
|
|
qp->s = -1;
|
|
} else {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
fes, fei, ch, pdu_len, hlen);
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
/* Check header digest if present. */
|
|
if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) {
|
|
digest = compute_digest(ch, ch->hlen);
|
|
memcpy(&rx_digest, (const char *)ch + ch->hlen,
|
|
sizeof(rx_digest));
|
|
if (digest != rx_digest) {
|
|
printf("NVMe/TCP: Header digest mismatch\n");
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch,
|
|
pdu_len, hlen);
|
|
return (EBADMSG);
|
|
}
|
|
}
|
|
|
|
/* Check data digest if present. */
|
|
if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) {
|
|
digest = compute_digest((const char *)ch + ch->pdo, data_len);
|
|
memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest),
|
|
sizeof(rx_digest));
|
|
if (digest != rx_digest) {
|
|
printf("NVMe/TCP: Data digest mismatch\n");
|
|
return (EBADMSG);
|
|
}
|
|
}
|
|
|
|
pdu->data_len = data_len;
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Read data from a socket, retrying until the data has been fully
|
|
* read or an error occurs.
|
|
*/
|
|
static int
|
|
nvmf_tcp_read_buffer(int s, void *buf, size_t len)
|
|
{
|
|
ssize_t nread;
|
|
char *cp;
|
|
|
|
cp = buf;
|
|
while (len != 0) {
|
|
nread = read(s, cp, len);
|
|
if (nread < 0)
|
|
return (errno);
|
|
if (nread == 0)
|
|
return (ECONNRESET);
|
|
len -= nread;
|
|
cp += nread;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_common_pdu_hdr ch;
|
|
uint32_t plen;
|
|
int error;
|
|
|
|
memset(pdu, 0, sizeof(*pdu));
|
|
error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch));
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
plen = le32toh(ch.plen);
|
|
|
|
/*
|
|
* Validate a header with garbage lengths to trigger
|
|
* an error message without reading more.
|
|
*/
|
|
if (plen < sizeof(ch) || ch.hlen > plen) {
|
|
pdu->hdr = &ch;
|
|
error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch));
|
|
pdu->hdr = NULL;
|
|
assert(error != 0);
|
|
return (error);
|
|
}
|
|
|
|
/* Read the rest of the PDU. */
|
|
pdu->hdr = malloc(plen);
|
|
memcpy(pdu->hdr, &ch, sizeof(ch));
|
|
error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch));
|
|
if (error != 0)
|
|
return (error);
|
|
error = nvmf_tcp_validate_pdu(qp, pdu, plen);
|
|
if (error != 0) {
|
|
free(pdu->hdr);
|
|
pdu->hdr = NULL;
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
free(pdu->hdr);
|
|
pdu->hdr = NULL;
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_term_req_hdr *hdr;
|
|
|
|
hdr = (void *)pdu->hdr;
|
|
|
|
printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n",
|
|
le16toh(hdr->fes), le32dec(hdr->fei));
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (ECONNRESET);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp,
|
|
struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_cmd *cmd;
|
|
struct nvmf_capsule *nc;
|
|
struct nvmf_tcp_capsule *tc;
|
|
|
|
cmd = (void *)pdu->hdr;
|
|
|
|
nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe);
|
|
if (nc == NULL)
|
|
return (ENOMEM);
|
|
|
|
tc = TCAP(nc);
|
|
tc->rx_pdu = *pdu;
|
|
|
|
TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp,
|
|
struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_rsp *rsp;
|
|
struct nvmf_capsule *nc;
|
|
struct nvmf_tcp_capsule *tc;
|
|
|
|
rsp = (void *)pdu->hdr;
|
|
|
|
nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe);
|
|
if (nc == NULL)
|
|
return (ENOMEM);
|
|
|
|
nc->nc_sqhd_valid = true;
|
|
tc = TCAP(nc);
|
|
tc->rx_pdu = *pdu;
|
|
|
|
TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
|
|
|
|
/*
|
|
* Once the CQE has been received, no further transfers to the
|
|
* command buffer for the associated CID can occur.
|
|
*/
|
|
tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true);
|
|
tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Construct and send a PDU that contains an optional data payload.
|
|
* This includes dealing with digests and the length fields in the
|
|
* common header.
|
|
*/
|
|
static int
|
|
nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen,
|
|
void *data, uint32_t data_len)
|
|
{
|
|
struct nvme_tcp_common_pdu_hdr *ch;
|
|
struct iovec iov[5];
|
|
u_int iovcnt;
|
|
uint32_t header_digest, data_digest, pad, pdo, plen;
|
|
|
|
plen = hlen;
|
|
if (qp->header_digests)
|
|
plen += sizeof(header_digest);
|
|
if (data_len != 0) {
|
|
pdo = roundup2(plen, qp->txpda);
|
|
pad = pdo - plen;
|
|
plen = pdo + data_len;
|
|
if (qp->data_digests)
|
|
plen += sizeof(data_digest);
|
|
} else {
|
|
assert(data == NULL);
|
|
pdo = 0;
|
|
pad = 0;
|
|
}
|
|
|
|
ch = hdr;
|
|
ch->hlen = hlen;
|
|
if (qp->header_digests)
|
|
ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF;
|
|
if (qp->data_digests && data_len != 0)
|
|
ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF;
|
|
ch->pdo = pdo;
|
|
ch->plen = htole32(plen);
|
|
|
|
/* CH + PSH */
|
|
iov[0].iov_base = hdr;
|
|
iov[0].iov_len = hlen;
|
|
iovcnt = 1;
|
|
|
|
/* HDGST */
|
|
if (qp->header_digests) {
|
|
header_digest = compute_digest(hdr, hlen);
|
|
iov[iovcnt].iov_base = &header_digest;
|
|
iov[iovcnt].iov_len = sizeof(header_digest);
|
|
iovcnt++;
|
|
}
|
|
|
|
if (pad != 0) {
|
|
/* PAD */
|
|
iov[iovcnt].iov_base = __DECONST(char *, zero_padding);
|
|
iov[iovcnt].iov_len = pad;
|
|
iovcnt++;
|
|
}
|
|
|
|
if (data_len != 0) {
|
|
/* DATA */
|
|
iov[iovcnt].iov_base = data;
|
|
iov[iovcnt].iov_len = data_len;
|
|
iovcnt++;
|
|
|
|
/* DDGST */
|
|
if (qp->data_digests) {
|
|
data_digest = compute_digest(data, data_len);
|
|
iov[iovcnt].iov_base = &data_digest;
|
|
iov[iovcnt].iov_len = sizeof(data_digest);
|
|
iovcnt++;
|
|
}
|
|
}
|
|
|
|
return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen));
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_h2c_data_hdr *h2c;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
uint32_t data_len, data_offset;
|
|
const char *icd;
|
|
|
|
h2c = (void *)pdu->hdr;
|
|
if (le32toh(h2c->datal) > qp->maxh2cdata) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true);
|
|
if (cb == NULL) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_len = le32toh(h2c->datal);
|
|
if (data_len != pdu->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_offset = le32toh(h2c->datao);
|
|
if (data_offset < cb->data_offset ||
|
|
data_offset + data_len > cb->data_offset + cb->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if (data_offset != cb->data_offset + cb->data_xfered) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if ((cb->data_xfered + data_len == cb->data_len) !=
|
|
((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb->data_xfered += data_len;
|
|
data_offset -= cb->data_offset;
|
|
icd = (const char *)pdu->hdr + pdu->hdr->pdo;
|
|
memcpy((char *)cb->data + data_offset, icd, data_len);
|
|
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_c2h_data_hdr *c2h;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
uint32_t data_len, data_offset;
|
|
const char *icd;
|
|
|
|
c2h = (void *)pdu->hdr;
|
|
|
|
cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true);
|
|
if (cb == NULL) {
|
|
/*
|
|
* XXX: Could be PDU sequence error if cccid is for a
|
|
* command that doesn't use a command buffer.
|
|
*/
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_len = le32toh(c2h->datal);
|
|
if (data_len != pdu->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_offset = le32toh(c2h->datao);
|
|
if (data_offset < cb->data_offset ||
|
|
data_offset + data_len > cb->data_offset + cb->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if (data_offset != cb->data_offset + cb->data_xfered) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if ((cb->data_xfered + data_len == cb->data_len) !=
|
|
((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb->data_xfered += data_len;
|
|
data_offset -= cb->data_offset;
|
|
icd = (const char *)pdu->hdr + pdu->hdr->pdo;
|
|
memcpy((char *)cb->data + data_offset, icd, data_len);
|
|
|
|
if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
|
|
struct nvme_completion cqe;
|
|
struct nvmf_tcp_capsule *tc;
|
|
struct nvmf_capsule *nc;
|
|
|
|
memset(&cqe, 0, sizeof(cqe));
|
|
cqe.cid = cb->cid;
|
|
|
|
nc = nvmf_allocate_response(&qp->qp, &cqe);
|
|
if (nc == NULL) {
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (ENOMEM);
|
|
}
|
|
nc->nc_sqhd_valid = false;
|
|
|
|
tc = TCAP(nc);
|
|
TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
|
|
}
|
|
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (0);
|
|
}
|
|
|
|
/* NB: cid and ttag and little-endian already. */
|
|
static int
|
|
tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
uint32_t data_offset, void *buf, size_t len, bool last_pdu)
|
|
{
|
|
struct nvme_tcp_h2c_data_hdr h2c;
|
|
|
|
memset(&h2c, 0, sizeof(h2c));
|
|
h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA;
|
|
if (last_pdu)
|
|
h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
|
|
h2c.cccid = cid;
|
|
h2c.ttag = ttag;
|
|
h2c.datao = htole32(data_offset);
|
|
h2c.datal = htole32(len);
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len));
|
|
}
|
|
|
|
/* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */
|
|
static int
|
|
tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
uint32_t data_offset, void *buf, size_t len, bool last_pdu)
|
|
{
|
|
char *p;
|
|
|
|
p = buf;
|
|
while (len != 0) {
|
|
size_t todo;
|
|
int error;
|
|
|
|
todo = len;
|
|
if (todo > qp->maxh2cdata)
|
|
todo = qp->maxh2cdata;
|
|
error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo,
|
|
last_pdu && todo == len);
|
|
if (error != 0)
|
|
return (error);
|
|
p += todo;
|
|
len -= todo;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
struct nvme_tcp_r2t_hdr *r2t;
|
|
uint32_t data_len, data_offset;
|
|
int error;
|
|
|
|
r2t = (void *)pdu->hdr;
|
|
|
|
cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false);
|
|
if (cb == NULL) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_offset = le32toh(r2t->r2to);
|
|
if (data_offset != cb->data_xfered) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
/*
|
|
* XXX: The spec does not specify how to handle R2T tranfers
|
|
* out of range of the original command.
|
|
*/
|
|
data_len = le32toh(r2t->r2tl);
|
|
if (data_offset + data_len > cb->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb->data_xfered += data_len;
|
|
|
|
/*
|
|
* Write out one or more H2C_DATA PDUs containing the
|
|
* requested data.
|
|
*/
|
|
error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag,
|
|
data_offset, (char *)cb->data + data_offset, data_len, true);
|
|
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp)
|
|
{
|
|
struct nvmf_tcp_rxpdu pdu;
|
|
int error;
|
|
|
|
error = nvmf_tcp_read_pdu(qp, &pdu);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
switch (pdu.hdr->pdu_type) {
|
|
default:
|
|
__unreachable();
|
|
break;
|
|
case NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
|
|
case NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
|
|
return (nvmf_tcp_handle_term_req(&pdu));
|
|
case NVME_TCP_PDU_TYPE_CAPSULE_CMD:
|
|
return (nvmf_tcp_save_command_capsule(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_CAPSULE_RESP:
|
|
return (nvmf_tcp_save_response_capsule(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_H2C_DATA:
|
|
return (nvmf_tcp_handle_h2c_data(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_C2H_DATA:
|
|
return (nvmf_tcp_handle_c2h_data(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_R2T:
|
|
return (nvmf_tcp_handle_r2t(qp, &pdu));
|
|
}
|
|
}
|
|
|
|
static bool
|
|
nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len)
|
|
{
|
|
const struct nvme_tcp_ic_req *pdu;
|
|
uint32_t plen;
|
|
u_int hlen;
|
|
|
|
/* Determine how large of a PDU header to return for errors. */
|
|
hlen = ch->hlen;
|
|
plen = le32toh(ch->plen);
|
|
if (hlen < sizeof(*ch) || hlen > plen)
|
|
hlen = sizeof(*ch);
|
|
|
|
/*
|
|
* Errors must be reported for the lowest incorrect field
|
|
* first, so validate fields in order.
|
|
*/
|
|
|
|
/* Validate pdu_type. */
|
|
|
|
/* Controllers only receive PDUs with a PDU direction of 0. */
|
|
if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
switch (ch->pdu_type) {
|
|
case NVME_TCP_PDU_TYPE_IC_REQ:
|
|
case NVME_TCP_PDU_TYPE_IC_RESP:
|
|
break;
|
|
default:
|
|
na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate flags. */
|
|
if (ch->flags != 0) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU header flags %#x",
|
|
ch->flags);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate hlen. */
|
|
if (ch->hlen != 128) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU header length %u",
|
|
ch->hlen);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate pdo. */
|
|
if (ch->pdo != 0) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate plen. */
|
|
if (plen != 128) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU length %u", plen);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate fields common to both ICReq and ICResp. */
|
|
pdu = (const struct nvme_tcp_ic_req *)ch;
|
|
if (le16toh(pdu->pfv) != 0) {
|
|
na_error(na, "NVMe/TCP: Unsupported PDU version %u",
|
|
le16toh(pdu->pfv));
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
|
|
8, ch, pdu_len, hlen);
|
|
return (false);
|
|
}
|
|
|
|
if (pdu->hpda > NVME_TCP_HPDA_MAX) {
|
|
na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
if (pdu->dgst.bits.reserved != 0) {
|
|
na_error(na, "NVMe/TCP: Invalid digest settings");
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
return (true);
|
|
}
|
|
|
|
static bool
|
|
nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
struct nvme_tcp_ic_req *pdu)
|
|
{
|
|
int error;
|
|
|
|
error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
|
|
if (error != 0) {
|
|
na_error(na, "NVMe/TCP: Failed to read IC request: %s",
|
|
strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
|
|
}
|
|
|
|
static bool
|
|
nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
struct nvme_tcp_ic_resp *pdu)
|
|
{
|
|
int error;
|
|
|
|
error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
|
|
if (error != 0) {
|
|
na_error(na, "NVMe/TCP: Failed to read IC response: %s",
|
|
strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
|
|
}
|
|
|
|
static struct nvmf_association *
|
|
tcp_allocate_association(bool controller,
|
|
const struct nvmf_association_params *params)
|
|
{
|
|
struct nvmf_tcp_association *ta;
|
|
|
|
if (controller) {
|
|
/* 7.4.10.3 */
|
|
if (params->tcp.maxh2cdata < 4096 ||
|
|
params->tcp.maxh2cdata % 4 != 0)
|
|
return (NULL);
|
|
}
|
|
|
|
ta = calloc(1, sizeof(*ta));
|
|
|
|
return (&ta->na);
|
|
}
|
|
|
|
static void
|
|
tcp_update_association(struct nvmf_association *na,
|
|
const struct nvme_controller_data *cdata)
|
|
{
|
|
struct nvmf_tcp_association *ta = TASSOC(na);
|
|
|
|
ta->ioccsz = le32toh(cdata->ioccsz);
|
|
}
|
|
|
|
static void
|
|
tcp_free_association(struct nvmf_association *na)
|
|
{
|
|
free(na);
|
|
}
|
|
|
|
static bool
|
|
tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin)
|
|
{
|
|
const struct nvmf_association_params *params = &na->na_params;
|
|
struct nvmf_tcp_association *ta = TASSOC(na);
|
|
struct nvme_tcp_ic_req ic_req;
|
|
struct nvme_tcp_ic_resp ic_resp;
|
|
uint32_t maxh2cdata;
|
|
int error;
|
|
|
|
if (!admin) {
|
|
if (ta->ioccsz == 0) {
|
|
na_error(na, "TCP I/O queues require cdata");
|
|
return (false);
|
|
}
|
|
if (ta->ioccsz < 4) {
|
|
na_error(na, "Invalid IOCCSZ %u", ta->ioccsz);
|
|
return (false);
|
|
}
|
|
}
|
|
|
|
memset(&ic_req, 0, sizeof(ic_req));
|
|
ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ;
|
|
ic_req.common.hlen = sizeof(ic_req);
|
|
ic_req.common.plen = htole32(sizeof(ic_req));
|
|
ic_req.pfv = htole16(0);
|
|
ic_req.hpda = params->tcp.pda;
|
|
if (params->tcp.header_digests)
|
|
ic_req.dgst.bits.hdgst_enable = 1;
|
|
if (params->tcp.data_digests)
|
|
ic_req.dgst.bits.ddgst_enable = 1;
|
|
ic_req.maxr2t = htole32(params->tcp.maxr2t);
|
|
|
|
error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req));
|
|
if (error != 0) {
|
|
na_error(na, "Failed to write IC request: %s", strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp))
|
|
return (false);
|
|
|
|
/* Ensure the controller didn't enable digests we didn't request. */
|
|
if ((!params->tcp.header_digests &&
|
|
ic_resp.dgst.bits.hdgst_enable != 0) ||
|
|
(!params->tcp.data_digests &&
|
|
ic_resp.dgst.bits.ddgst_enable != 0)) {
|
|
na_error(na, "Controller enabled unrequested digests");
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
|
|
11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp));
|
|
return (false);
|
|
}
|
|
|
|
/*
|
|
* XXX: Is there an upper-bound to enforce here? Perhaps pick
|
|
* some large value and report larger values as an unsupported
|
|
* parameter?
|
|
*/
|
|
maxh2cdata = le32toh(ic_resp.maxh2cdata);
|
|
if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) {
|
|
na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp,
|
|
sizeof(ic_resp), sizeof(ic_resp));
|
|
return (false);
|
|
}
|
|
|
|
qp->txpda = (params->tcp.pda + 1) * 4;
|
|
qp->rxpda = (ic_resp.cpda + 1) * 4;
|
|
qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
|
|
qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
|
|
qp->maxr2t = params->tcp.maxr2t;
|
|
qp->maxh2cdata = maxh2cdata;
|
|
if (admin)
|
|
/* 7.4.3 */
|
|
qp->max_icd = 8192;
|
|
else
|
|
qp->max_icd = (ta->ioccsz - 4) * 16;
|
|
|
|
return (0);
|
|
}
|
|
|
|
static bool
|
|
tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na)
|
|
{
|
|
const struct nvmf_association_params *params = &na->na_params;
|
|
struct nvme_tcp_ic_req ic_req;
|
|
struct nvme_tcp_ic_resp ic_resp;
|
|
int error;
|
|
|
|
if (!nvmf_tcp_read_ic_req(na, qp, &ic_req))
|
|
return (false);
|
|
|
|
memset(&ic_resp, 0, sizeof(ic_resp));
|
|
ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP;
|
|
ic_resp.common.hlen = sizeof(ic_req);
|
|
ic_resp.common.plen = htole32(sizeof(ic_req));
|
|
ic_resp.pfv = htole16(0);
|
|
ic_resp.cpda = params->tcp.pda;
|
|
if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0)
|
|
ic_resp.dgst.bits.hdgst_enable = 1;
|
|
if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0)
|
|
ic_resp.dgst.bits.ddgst_enable = 1;
|
|
ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata);
|
|
|
|
error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp));
|
|
if (error != 0) {
|
|
na_error(na, "Failed to write IC response: %s",
|
|
strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
qp->txpda = (params->tcp.pda + 1) * 4;
|
|
qp->rxpda = (ic_req.hpda + 1) * 4;
|
|
qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
|
|
qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
|
|
qp->maxr2t = le32toh(ic_req.maxr2t);
|
|
qp->maxh2cdata = params->tcp.maxh2cdata;
|
|
qp->max_icd = 0; /* XXX */
|
|
return (0);
|
|
}
|
|
|
|
static struct nvmf_qpair *
|
|
tcp_allocate_qpair(struct nvmf_association *na,
|
|
const struct nvmf_qpair_params *qparams)
|
|
{
|
|
const struct nvmf_association_params *aparams = &na->na_params;
|
|
struct nvmf_tcp_qpair *qp;
|
|
int error;
|
|
|
|
if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) {
|
|
na_error(na, "Invalid PDA");
|
|
return (NULL);
|
|
}
|
|
|
|
qp = calloc(1, sizeof(*qp));
|
|
qp->s = qparams->tcp.fd;
|
|
LIST_INIT(&qp->rx_buffers);
|
|
LIST_INIT(&qp->tx_buffers);
|
|
TAILQ_INIT(&qp->rx_capsules);
|
|
if (na->na_controller)
|
|
error = tcp_accept(qp, na);
|
|
else
|
|
error = tcp_connect(qp, na, qparams->admin);
|
|
if (error != 0) {
|
|
free(qp);
|
|
return (NULL);
|
|
}
|
|
|
|
return (&qp->qp);
|
|
}
|
|
|
|
static void
|
|
tcp_free_qpair(struct nvmf_qpair *nq)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nq);
|
|
struct nvmf_tcp_capsule *ntc, *tc;
|
|
struct nvmf_tcp_command_buffer *ncb, *cb;
|
|
|
|
TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) {
|
|
TAILQ_REMOVE(&qp->rx_capsules, tc, link);
|
|
nvmf_free_capsule(&tc->nc);
|
|
}
|
|
LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) {
|
|
tcp_free_command_buffer(cb);
|
|
}
|
|
LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) {
|
|
tcp_free_command_buffer(cb);
|
|
}
|
|
free(qp);
|
|
}
|
|
|
|
static int
|
|
tcp_kernel_handoff_params(struct nvmf_qpair *nq,
|
|
struct nvmf_handoff_qpair_params *qparams)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nq);
|
|
|
|
qparams->tcp.fd = qp->s;
|
|
qparams->tcp.rxpda = qp->rxpda;
|
|
qparams->tcp.txpda = qp->txpda;
|
|
qparams->tcp.header_digests = qp->header_digests;
|
|
qparams->tcp.data_digests = qp->data_digests;
|
|
qparams->tcp.maxr2t = qp->maxr2t;
|
|
qparams->tcp.maxh2cdata = qp->maxh2cdata;
|
|
qparams->tcp.max_icd = qp->max_icd;
|
|
|
|
return (0);
|
|
}
|
|
|
|
static struct nvmf_capsule *
|
|
tcp_allocate_capsule(struct nvmf_qpair *qp __unused)
|
|
{
|
|
struct nvmf_tcp_capsule *nc;
|
|
|
|
nc = calloc(1, sizeof(*nc));
|
|
return (&nc->nc);
|
|
}
|
|
|
|
static void
|
|
tcp_free_capsule(struct nvmf_capsule *nc)
|
|
{
|
|
struct nvmf_tcp_capsule *tc = TCAP(nc);
|
|
|
|
nvmf_tcp_free_pdu(&tc->rx_pdu);
|
|
if (tc->cb != NULL)
|
|
tcp_free_command_buffer(tc->cb);
|
|
free(tc);
|
|
}
|
|
|
|
static int
|
|
tcp_transmit_command(struct nvmf_capsule *nc)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
struct nvmf_tcp_capsule *tc = TCAP(nc);
|
|
struct nvme_tcp_cmd cmd;
|
|
struct nvme_sgl_descriptor *sgl;
|
|
int error;
|
|
bool use_icd;
|
|
|
|
use_icd = false;
|
|
if (nc->nc_data_len != 0 && nc->nc_send_data &&
|
|
nc->nc_data_len <= qp->max_icd)
|
|
use_icd = true;
|
|
|
|
memset(&cmd, 0, sizeof(cmd));
|
|
cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD;
|
|
cmd.ccsqe = nc->nc_sqe;
|
|
|
|
/* Populate SGL in SQE. */
|
|
sgl = &cmd.ccsqe.sgl;
|
|
memset(sgl, 0, sizeof(*sgl));
|
|
sgl->address = 0;
|
|
sgl->length = htole32(nc->nc_data_len);
|
|
if (use_icd) {
|
|
/* Use in-capsule data. */
|
|
sgl->type = NVME_SGL_TYPE_ICD;
|
|
} else {
|
|
/* Use a command buffer. */
|
|
sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER;
|
|
}
|
|
|
|
/* Send command capsule. */
|
|
error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ?
|
|
nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
/*
|
|
* If data will be transferred using a command buffer, allocate a
|
|
* buffer structure and queue it.
|
|
*/
|
|
if (nc->nc_data_len != 0 && !use_icd)
|
|
tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0,
|
|
nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
tcp_transmit_response(struct nvmf_capsule *nc)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
struct nvme_tcp_rsp rsp;
|
|
|
|
memset(&rsp, 0, sizeof(rsp));
|
|
rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP;
|
|
rsp.rccqe = nc->nc_cqe;
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0));
|
|
}
|
|
|
|
static int
|
|
tcp_transmit_capsule(struct nvmf_capsule *nc)
|
|
{
|
|
if (nc->nc_qe_len == sizeof(struct nvme_command))
|
|
return (tcp_transmit_command(nc));
|
|
else
|
|
return (tcp_transmit_response(nc));
|
|
}
|
|
|
|
static int
|
|
tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nq);
|
|
struct nvmf_tcp_capsule *tc;
|
|
int error;
|
|
|
|
while (TAILQ_EMPTY(&qp->rx_capsules)) {
|
|
error = nvmf_tcp_receive_pdu(qp);
|
|
if (error != 0)
|
|
return (error);
|
|
}
|
|
tc = TAILQ_FIRST(&qp->rx_capsules);
|
|
TAILQ_REMOVE(&qp->rx_capsules, tc, link);
|
|
*ncp = &tc->nc;
|
|
return (0);
|
|
}
|
|
|
|
static uint8_t
|
|
tcp_validate_command_capsule(const struct nvmf_capsule *nc)
|
|
{
|
|
const struct nvmf_tcp_capsule *tc = CTCAP(nc);
|
|
const struct nvme_sgl_descriptor *sgl;
|
|
|
|
assert(tc->rx_pdu.hdr != NULL);
|
|
|
|
sgl = &nc->nc_sqe.sgl;
|
|
switch (sgl->type) {
|
|
case NVME_SGL_TYPE_ICD:
|
|
if (tc->rx_pdu.data_len != le32toh(sgl->length)) {
|
|
printf("NVMe/TCP: Command Capsule with mismatched ICD length\n");
|
|
return (NVME_SC_DATA_SGL_LENGTH_INVALID);
|
|
}
|
|
break;
|
|
case NVME_SGL_TYPE_COMMAND_BUFFER:
|
|
if (tc->rx_pdu.data_len != 0) {
|
|
printf("NVMe/TCP: Command Buffer SGL with ICD\n");
|
|
return (NVME_SC_INVALID_FIELD);
|
|
}
|
|
break;
|
|
default:
|
|
printf("NVMe/TCP: Invalid SGL type in Command Capsule\n");
|
|
return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID);
|
|
}
|
|
|
|
if (sgl->address != 0) {
|
|
printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n");
|
|
return (NVME_SC_SGL_OFFSET_INVALID);
|
|
}
|
|
|
|
return (NVME_SC_SUCCESS);
|
|
}
|
|
|
|
static size_t
|
|
tcp_capsule_data_len(const struct nvmf_capsule *nc)
|
|
{
|
|
assert(nc->nc_qe_len == sizeof(struct nvme_command));
|
|
return (le32toh(nc->nc_sqe.sgl.length));
|
|
}
|
|
|
|
/* NB: cid and ttag are both little-endian already. */
|
|
static int
|
|
tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
uint32_t data_offset, uint32_t data_len)
|
|
{
|
|
struct nvme_tcp_r2t_hdr r2t;
|
|
|
|
memset(&r2t, 0, sizeof(r2t));
|
|
r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T;
|
|
r2t.cccid = cid;
|
|
r2t.ttag = ttag;
|
|
r2t.r2to = htole32(data_offset);
|
|
r2t.r2tl = htole32(data_len);
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0));
|
|
}
|
|
|
|
static int
|
|
tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset,
|
|
void *buf, size_t len)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
int error;
|
|
uint16_t ttag;
|
|
|
|
/*
|
|
* Don't bother byte-swapping ttag as it is just a cookie
|
|
* value returned by the other end as-is.
|
|
*/
|
|
ttag = qp->next_ttag++;
|
|
|
|
error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
cb = tcp_alloc_command_buffer(qp, buf, data_offset, len,
|
|
nc->nc_sqe.cid, ttag, true);
|
|
|
|
/* Parse received PDUs until the data transfer is complete. */
|
|
while (cb->data_xfered < cb->data_len) {
|
|
error = nvmf_tcp_receive_pdu(qp);
|
|
if (error != 0)
|
|
break;
|
|
}
|
|
tcp_free_command_buffer(cb);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset,
|
|
void *buf, size_t len)
|
|
{
|
|
const struct nvmf_tcp_capsule *tc = CTCAP(nc);
|
|
const char *icd;
|
|
|
|
icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset;
|
|
memcpy(buf, icd, len);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset,
|
|
void *buf, size_t len)
|
|
{
|
|
struct nvmf_association *na = nc->nc_qpair->nq_association;
|
|
const struct nvme_sgl_descriptor *sgl;
|
|
size_t data_len;
|
|
|
|
if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
|
|
return (EINVAL);
|
|
|
|
sgl = &nc->nc_sqe.sgl;
|
|
data_len = le32toh(sgl->length);
|
|
if (data_offset + len > data_len)
|
|
return (EFBIG);
|
|
|
|
if (sgl->type == NVME_SGL_TYPE_ICD)
|
|
return (tcp_receive_icd_data(nc, data_offset, buf, len));
|
|
else
|
|
return (tcp_receive_r2t_data(nc, data_offset, buf, len));
|
|
}
|
|
|
|
/* NB: cid is little-endian already. */
|
|
static int
|
|
tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid,
|
|
uint32_t data_offset, const void *buf, size_t len, bool last_pdu,
|
|
bool success)
|
|
{
|
|
struct nvme_tcp_c2h_data_hdr c2h;
|
|
|
|
memset(&c2h, 0, sizeof(c2h));
|
|
c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA;
|
|
if (last_pdu)
|
|
c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
|
|
if (success)
|
|
c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
|
|
c2h.cccid = cid;
|
|
c2h.datao = htole32(data_offset);
|
|
c2h.datal = htole32(len);
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h),
|
|
__DECONST(void *, buf), len));
|
|
}
|
|
|
|
static int
|
|
tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
|
|
size_t len)
|
|
{
|
|
struct nvmf_association *na = nc->nc_qpair->nq_association;
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
const struct nvme_sgl_descriptor *sgl;
|
|
const char *src;
|
|
size_t todo;
|
|
uint32_t data_len, data_offset;
|
|
int error;
|
|
bool last_pdu, send_success_flag;
|
|
|
|
if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
|
|
return (EINVAL);
|
|
|
|
sgl = &nc->nc_sqe.sgl;
|
|
data_len = le32toh(sgl->length);
|
|
if (len != data_len) {
|
|
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
|
|
return (EFBIG);
|
|
}
|
|
|
|
if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) {
|
|
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* Use the SUCCESS flag if SQ flow control is disabled. */
|
|
send_success_flag = !qp->qp.nq_flow_control;
|
|
|
|
/*
|
|
* Write out one or more C2H_DATA PDUs containing the data.
|
|
* Each PDU is arbitrarily capped at 256k.
|
|
*/
|
|
data_offset = 0;
|
|
src = buf;
|
|
while (len > 0) {
|
|
if (len > 256 * 1024) {
|
|
todo = 256 * 1024;
|
|
last_pdu = false;
|
|
} else {
|
|
todo = len;
|
|
last_pdu = true;
|
|
}
|
|
error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset,
|
|
src, todo, last_pdu, last_pdu && send_success_flag);
|
|
if (error != 0) {
|
|
nvmf_send_generic_error(nc,
|
|
NVME_SC_TRANSIENT_TRANSPORT_ERROR);
|
|
return (error);
|
|
}
|
|
data_offset += todo;
|
|
src += todo;
|
|
len -= todo;
|
|
}
|
|
if (!send_success_flag)
|
|
nvmf_send_success(nc);
|
|
return (0);
|
|
}
|
|
|
|
struct nvmf_transport_ops tcp_ops = {
|
|
.allocate_association = tcp_allocate_association,
|
|
.update_association = tcp_update_association,
|
|
.free_association = tcp_free_association,
|
|
.allocate_qpair = tcp_allocate_qpair,
|
|
.free_qpair = tcp_free_qpair,
|
|
.kernel_handoff_params = tcp_kernel_handoff_params,
|
|
.allocate_capsule = tcp_allocate_capsule,
|
|
.free_capsule = tcp_free_capsule,
|
|
.transmit_capsule = tcp_transmit_capsule,
|
|
.receive_capsule = tcp_receive_capsule,
|
|
.validate_command_capsule = tcp_validate_command_capsule,
|
|
.capsule_data_len = tcp_capsule_data_len,
|
|
.receive_controller_data = tcp_receive_controller_data,
|
|
.send_controller_data = tcp_send_controller_data,
|
|
};
|