[PATCH 3/4] nvme-fabrics: add tp8010 support

Sagi Grimberg sagi at grimberg.me
Thu Jan 27 05:12:05 PST 2022



On 1/27/22 11:30, Hannes Reinecke wrote:
> On 1/25/22 15:59, Martin Belanger wrote:
>> From: Martin Belanger <martin.belanger at dell.com>
>>
>> TP8010 introduces Central Discovery Controllers (CDC) and the
>> Discovery Information Management (DIM) PDU, which is used to
>> perform explicit registration with Discovery Controllers.
>>
>> This patch defines the DIM PDU and adds logic to perform explicit
>> registration with DCs when registration has been requested by the
>> user.
>>
>> Signed-off-by: Martin Belanger <martin.belanger at dell.com>
>> ---
>>   drivers/nvme/host/fabrics.c | 164 +++++++++++++++++++++++
>>   drivers/nvme/host/fabrics.h |   9 ++
>>   drivers/nvme/host/tcp.c     |  51 +++++++-
>>   include/linux/nvme.h        | 255 ++++++++++++++++++++++++++++++++++--
>>   4 files changed, 467 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
>> index 040a6cce6afa..e6fc2f85b670 100644
>> --- a/drivers/nvme/host/fabrics.c
>> +++ b/drivers/nvme/host/fabrics.c
>> @@ -10,6 +10,7 @@
>>   #include <linux/mutex.h>
>>   #include <linux/parser.h>
>>   #include <linux/seq_file.h>
>> +#include <linux/utsname.h>
>>   #include "nvme.h"
>>   #include "fabrics.h"
>> @@ -526,6 +527,165 @@ static struct nvmf_transport_ops 
>> *nvmf_lookup_transport(
>>       return NULL;
>>   }
>> +/**
>> + * nvmf_get_tel() - Calculate the amount of memory needed for a 
>> Discovery
>> + * Information Entry. Each Entry must contain at a minimum an Extended
>> + * Attribute for the HostID. The Entry may optionally contain an 
>> Extended
>> + * Attribute for the Symbolic Name.
>> + *
>> + * @hostsymname - Symbolic name (may be NULL)
>> + *
>> + * Return Total Entry Length
>> + */
>> +static u32 nvmf_get_tel(const char *hostsymname)
>> +{
>> +    u32 tel = SIZEOF_EXT_DIE_WO_EXAT;
>> +    u16 len;
>> +
>> +    /* Host ID is mandatory */
>> +    tel += exat_size(sizeof(uuid_t));
>> +
>> +    /* Symbolic name is optional */
>> +    len = hostsymname ? strlen(hostsymname) : 0;
>> +    if (len)
>> +        tel += exat_size(len);
>> +
>> +    return tel;
>> +}
>> +
>> +/**
>> + * nvmf_fill_die() - Fill the Discovery Information Entry pointed to by
>> + *                   @die.
>> + *
>> + * @die - Pointer to Discovery Information Entry to be filled
>> + * @tel - Length of the DIE
>> + * @trtype - Transport type
>> + * @adrfam - Address family
>> + * @reg_addr - Address to register. Setting this to an empty string 
>> tells
>> + *       the DC to infer address from the source address of the socket.
>> + * @nqn - Host NQN
>> + * @hostid - Host ID
>> + * @hostsymname - Host symbolic name
>> + * @tsas - Transport Specific Address Subtype for the address being
>> + *       registered.
>> + */
>> +static void nvmf_fill_die(struct nvmf_ext_die  *die,
>> +              u32                   tel,
>> +              u8                    trtype,
>> +              u8                    adrfam,
>> +              const char           *reg_addr,
>> +              struct nvmf_host     *host,
>> +              union nvmf_tsas      *tsas)
>> +{
>> +    u16 numexat = 0;
>> +    size_t symname_len;
>> +    struct nvmf_ext_attr *exat;
>> +
>> +    die->tel = cpu_to_le32(tel);
>> +    die->trtype = trtype;
>> +    die->adrfam = adrfam;
>> +
>> +    strncpy(die->nqn, host->nqn, sizeof(die->nqn));
>> +    strncpy(die->traddr, reg_addr, sizeof(die->traddr));
>> +    memcpy(&die->tsas, tsas, sizeof(die->tsas));
>> +
>> +    /* Extended Attribute for the HostID (mandatory) */
>> +    numexat++;
>> +    exat = &die->exat;
>> +    exat->type = cpu_to_le16(NVME_EXATTYPE_HOSTID);
>> +    exat->len  = cpu_to_le16(exat_len(sizeof(host->id)));
>> +    uuid_copy((uuid_t *)exat->val, &host->id);
>> +
>> +    /* Extended Attribute for the Symbolic Name (optional) */
>> +    symname_len = strlen(host->symname);
>> +    if (symname_len) {
>> +        u16 exatlen = exat_len(symname_len);
>> +
>> +        numexat++;
>> +        exat = exat_ptr_next(exat);
>> +        exat->type = cpu_to_le16(NVME_EXATTYPE_SYMNAME);
>> +        exat->len  = cpu_to_le16(exatlen);
>> +        memcpy(exat->val, host->symname, symname_len);
>> +        /* Per Base specs, ASCII strings must be padded with spaces */
>> +        memset(&exat->val[symname_len], ' ', exatlen - symname_len);
>> +    }
>> +
>> +    die->numexat = cpu_to_le16(numexat);
>> +}
>> +
>> +/**
>> + * nvmf_disc_info_mgmt() - Perform explicit registration, 
>> deregistration,
>> + * or registration-update (specified by @tas) by sending a Discovery
>> + * Information Management (DIM) command to the Discovery Controller 
>> (DC).
>> + *
>> + * @ctrl: Host NVMe controller instance maintaining the admin queue 
>> used to
>> + *      submit the DIM command to the DC.
>> + * @tas: Task field of the Command Dword 10 (cdw10). Indicates 
>> whether to
>> + *     perform a Registration, Deregistration, or Registration-update.
>> + * @trtype: Transport type (e.g. NVMF_TRTYPE_TCP)
>> + * @adrfam: Address family (e.g. NVMF_ADDR_FAMILY_IP6)
>> + * @reg_addr - Address to register. Setting this to an empty string 
>> tells
>> + *       the DC to infer address from the source address of the socket.
>> + * @tsas - Transport Specific Address Subtype for the address being
>> + *       registered.
>> + *
>> + * Return:   0: success,
>> + *         > 0: NVMe error status code,
>> + *         < 0: Linux errno error code
>> + */
>> +int nvmf_disc_info_mgmt(struct nvme_ctrl *ctrl,
>> +            enum nvmf_dim_tas tas,
>> +            u8                trtype,
>> +            u8                adrfam,
>> +            const char       *reg_addr,
>> +            union nvmf_tsas  *tsas)
>> +{
>> +    int                   ret;
>> +    u32                   tdl;  /* Total Data Length */
>> +    u32                   tel;  /* Total Entry Length */
>> +    struct nvmf_dim_data *dim;  /* Discovery Information Management */
>> +    struct nvmf_ext_die  *die;  /* Discovery Information Entry */
>> +    struct nvme_command   cmd = {};
>> +    union nvme_result     result;
>> +
>> +    /* Register 1 Discovery Information Entry (DIE) of size TEL */
>> +    tel = nvmf_get_tel(ctrl->opts->host->symname);
>> +    tdl = SIZEOF_DIM_WO_DIE + tel;
>> +
>> +    dim = kzalloc(tdl, GFP_KERNEL);
>> +    if (!dim)
>> +        return -ENOMEM;
>> +
>> +    cmd.dim.opcode = nvme_admin_dim;
>> +    cmd.dim.cdw10  = cpu_to_le32(tas);
>> +
>> +    dim->tdl    = cpu_to_le32(tdl);
>> +    dim->nument = cpu_to_le64(1);    /* only 1 DIE to register */
>> +    dim->entfmt = cpu_to_le16(NVME_ENTFMT_EXTENDED);
>> +    dim->etype  = cpu_to_le16(NVME_REGISTRATION_HOST);
>> +    dim->ektype = cpu_to_le16(0x5F); /* must be 0x5F per specs */
>> +
>> +    strncpy(dim->eid, ctrl->opts->host->nqn, sizeof(dim->eid));
>> +    strncpy(dim->ename, utsname()->nodename, sizeof(dim->ename));
>> +    strncpy(dim->ever, utsname()->release, sizeof(dim->ever));
>> +
>> +    die = &dim->die.extended;
>> +    nvmf_fill_die(die, tel, trtype, adrfam, reg_addr,
>> +              ctrl->opts->host, tsas);
>> +
>> +    ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &result,
>> +                     dim, tdl, 0, NVME_QID_ANY, 1, 0);
>> +    if (unlikely(ret)) {
>> +        dev_err(ctrl->device, "DIM error Result:0x%04X Status:0x%04X\n",
>> +           le32_to_cpu(result.u32), ret);
>> +    }
>> +
>> +    kfree(dim);
>> +
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(nvmf_disc_info_mgmt);
>> +
>>   static const match_table_t opt_tokens = {
>>       { NVMF_OPT_TRANSPORT,        "transport=%s"        },
>>       { NVMF_OPT_TRADDR,        "traddr=%s"        },
>> @@ -550,6 +710,7 @@ static const match_table_t opt_tokens = {
>>       { NVMF_OPT_TOS,            "tos=%d"        },
>>       { NVMF_OPT_FAIL_FAST_TMO,    "fast_io_fail_tmo=%d"    },
>>       { NVMF_OPT_DISCOVERY,        "discovery"        },
>> +    { NVMF_OPT_REGISTER,        "register"        },
>>       { NVMF_OPT_ERR,            NULL            }
>>   };
>> @@ -831,6 +992,9 @@ static int nvmf_parse_options(struct 
>> nvmf_ctrl_options *opts,
>>           case NVMF_OPT_DISCOVERY:
>>               opts->discovery_nqn = true;
>>               break;
>> +        case NVMF_OPT_REGISTER:
>> +            opts->explicit_registration = true;
>> +            break;
>>           case NVMF_OPT_SYMNAME:
>>               hostsymname = match_strdup(args);
>>               if (!hostsymname) {
>> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
>> index 494e6dbe233a..77f5768f8ff4 100644
>> --- a/drivers/nvme/host/fabrics.h
>> +++ b/drivers/nvme/host/fabrics.h
>> @@ -70,6 +70,7 @@ enum {
>>       NVMF_OPT_HOST_IFACE    = 1 << 21,
>>       NVMF_OPT_DISCOVERY    = 1 << 22,
>>       NVMF_OPT_SYMNAME    = 1 << 23,
>> +    NVMF_OPT_REGISTER    = 1 << 24,
>>   };
>>   /**
>> @@ -106,6 +107,7 @@ enum {
>>    * @nr_poll_queues: number of queues for polling I/O
>>    * @tos: type of service
>>    * @fast_io_fail_tmo: Fast I/O fail timeout in seconds
>> + * @explicit_registration: Explicit Registration with DC using the 
>> DIM PDU
>>    */
>>   struct nvmf_ctrl_options {
>>       unsigned        mask;
>> @@ -130,6 +132,7 @@ struct nvmf_ctrl_options {
>>       unsigned int        nr_poll_queues;
>>       int            tos;
>>       int            fast_io_fail_tmo;
>> +    bool            explicit_registration;
>>   };
>>   /*
>> @@ -200,5 +203,11 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char 
>> *buf, int size);
>>   bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
>>   bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
>>           struct nvmf_ctrl_options *opts);
>> +int nvmf_disc_info_mgmt(struct nvme_ctrl *ctrl,
>> +            enum nvmf_dim_tas tas,
>> +            u8                trtype,
>> +            u8                adrfam,
>> +            const char       *reg_addr,
>> +            union nvmf_tsas  *tsas);
>>   #endif /* _NVME_FABRICS_H */
>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>> index 4ceb28675fdf..970b7df574a4 100644
>> --- a/drivers/nvme/host/tcp.c
>> +++ b/drivers/nvme/host/tcp.c
>> @@ -1993,6 +1993,40 @@ static void nvme_tcp_reconnect_or_remove(struct 
>> nvme_ctrl *ctrl)
>>       }
>>   }
>> +/**
>> + * nvme_get_adrfam() - Get address family for the address we're 
>> registering
>> + * with the DC. We retrieve this info from the socket itself. If we 
>> can't
>> + * get the source address from the socket, then we'll infer the address
>> + * family from the address of the DC since the DC address has the same
>> + * address family.
>> + *
>> + * @ctrl: Host NVMe controller instance maintaining the admin queue 
>> used to
>> + *      submit the DIM command to the DC.
>> + *
>> + * Return: The address family of the source address associated with the
>> + * socket connected to the DC.
>> + */
>> +static u8 nvme_get_adrfam(struct nvme_ctrl *ctrl)
>> +{
>> +    u8                       adrfam = NVMF_ADDR_FAMILY_IP4;
>> +    struct sockaddr_storage  sas;
>> +    struct sockaddr         *sa = (struct sockaddr *)&sas;
>> +    struct nvme_tcp_queue   *queue = &to_tcp_ctrl(ctrl)->queues[0];
>> +
>> +    if (kernel_getsockname(queue->sock, sa) == 0) {
>> +        if (sa->sa_family == AF_INET6)
>> +            adrfam = NVMF_ADDR_FAMILY_IP6;
>> +    } else {
>> +        unsigned char buf[sizeof(struct in6_addr)];
>> +
>> +        if (in6_pton(ctrl->opts->traddr,
>> +                 strlen(ctrl->opts->traddr), buf, -1, NULL) > 0)
>> +            adrfam = NVMF_ADDR_FAMILY_IP6;
>> +    }
>> +
>> +    return adrfam;
>> +}
>> +
>>   static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
>>   {
>>       struct nvmf_ctrl_options *opts = ctrl->opts;
>> @@ -2045,6 +2079,20 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl 
>> *ctrl, bool new)
>>           goto destroy_io;
>>       }
>> +    if (ctrl->opts->explicit_registration &&
>> ++        ((ctrl->dctype == NVME_DCTYPE_DDC) ||
>> +         (ctrl->dctype == NVME_DCTYPE_CDC))) {
>> +        /* We're registering our source address with the DC. To do
>> +         * that, we can simply send an empty string. This tells the DC
>> +         * to retrieve the source address from the socket and use that
>> +         * as the registration address.
>> +         */
>> +        union nvmf_tsas tsas = {.tcp.sectype = NVMF_TCP_SECTYPE_NONE};
>> +
>> +        nvmf_disc_info_mgmt(ctrl, NVME_TAS_REGISTER, NVMF_TRTYPE_TCP,
>> +                    nvme_get_adrfam(ctrl), "", &tsas);
>> +    }
>> +
>>       nvme_start_ctrl(ctrl);
>>       return 0;
>> @@ -2613,7 +2661,8 @@ static struct nvmf_transport_ops 
>> nvme_tcp_transport = {
>>                 NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
>>                 NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
>>                 NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
>> -              NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE,
>> +              NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE |
>> +              NVMF_OPT_REGISTER,
>>       .create_ctrl    = nvme_tcp_create_ctrl,
>>   };
>> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
>> index 3b47951342f4..891615876cfa 100644
>> --- a/include/linux/nvme.h
>> +++ b/include/linux/nvme.h
>> @@ -9,6 +9,7 @@
>>   #include <linux/types.h>
>>   #include <linux/uuid.h>
>> +#include <linux/kernel.h>
>>   /* NQN names in commands fields specified one size */
>>   #define NVMF_NQN_FIELD_LEN    256
>> @@ -102,6 +103,16 @@ enum {
>>       NVMF_RDMA_CMS_RDMA_CM    = 1, /* Sockets based endpoint 
>> addressing */
>>   };
>> +/**
>> + * enum -
>> + * @NVMF_TCP_SECTYPE_NONE: No Security
>> + * @NVMF_TCP_SECTYPE_TLS:  Transport Layer Security
>> + */
>> +enum {
>> +    NVMF_TCP_SECTYPE_NONE    = 0,
>> +    NVMF_TCP_SECTYPE_TLS    = 1,
>> +};
>> +
>>   #define NVME_AQ_DEPTH        32
>>   #define NVME_NR_AEN_COMMANDS    1
>>   #define NVME_AQ_BLK_MQ_DEPTH    (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
>> @@ -1036,6 +1047,7 @@ enum nvme_admin_opcode {
>>       nvme_admin_sanitize_nvm        = 0x84,
>>       nvme_admin_get_lba_status    = 0x86,
>>       nvme_admin_vendor_start        = 0xC0,
>> +    nvme_admin_dim            = 0x21,
>>   };
>>   #define nvme_admin_opcode_name(opcode)    { opcode, #opcode }
>> @@ -1316,6 +1328,29 @@ struct nvmf_common_command {
>>       __u8    ts[24];
>>   };
>> +/**
>> + * union nvmf_tsas - Transport Specific Address Subtype
>> + * @qptype:  Queue Pair Service Type (NVMF_RDMA_QPTYPE_CONNECTED, 
>> NVMF_RDMA_QPTYPE_DATAGRAM)
>> + * @prtype:  Provider Type (see enum  nvme_rdma_prtype)
>> + * @cma:     Connection Management Service (NVMF_RDMA_CMS_RDMA_CM)
>> + * @pkey:    Partition Key
>> + * @sectype: Security Type (NVMF_TCP_SECTYPE_NONE, NVMF_TCP_SECTYPE_TLS)
>> + */
>> +union nvmf_tsas {
>> +    char        common[NVMF_TSAS_SIZE];
>> +    struct rdma {
>> +        __u8    qptype;
>> +        __u8    prtype;
>> +        __u8    cms;
>> +        __u8    rsvd3[5];
>> +        __u16    pkey;
>> +        __u8    rsvd10[246];
>> +    } rdma;
>> +    struct tcp {
>> +        __u8    sectype;
>> +    } tcp;
>> +};
>> +
>>   /*
>>    * The legal cntlid range a NVMe Target will provide.
>>    * Note that cntlid of value 0 is considered illegal in the fabrics 
>> world.
>> @@ -1349,17 +1384,7 @@ struct nvmf_disc_rsp_page_entry {
>>       __u8        resv64[192];
>>       char        subnqn[NVMF_NQN_FIELD_LEN];
>>       char        traddr[NVMF_TRADDR_SIZE];
>> -    union tsas {
>> -        char        common[NVMF_TSAS_SIZE];
>> -        struct rdma {
>> -            __u8    qptype;
>> -            __u8    prtype;
>> -            __u8    cms;
>> -            __u8    resv3[5];
>> -            __u16    pkey;
>> -            __u8    resv10[246];
>> -        } rdma;
>> -    } tsas;
>> +    union nvmf_tsas tsas;
>>   };
>>   /* Discovery log page header */
>> @@ -1447,6 +1472,213 @@ struct streams_directive_params {
>>       __u8    rsvd2[6];
>>   };
>> +/**
>> + * Discovery Information Management (DIM) command. This is sent by a
>> + * host to the Central Discovery Controller (CDC) to perform
>> + * explicit registration.
>> + */
>> +enum nvmf_dim_tas {
>> +    NVME_TAS_REGISTER   = 0x00,
>> +    NVME_TAS_DEREGISTER = 0x01,
>> +    NVME_TAS_UPDATE     = 0x02,
>> +};
>> +
>> +struct nvmf_dim_command {
>> +    __u8                  opcode;     /* nvme_admin_dim */
>> +    __u8                  flags;
>> +    __u16                 command_id;
>> +    __le32                nsid;
>> +    __u64                 rsvd2[2];
>> +    union nvme_data_ptr   dptr;
>> +    __le32                cdw10;      /* enum nvmf_dim_tas */
>> +    __le32                cdw11;
>> +    __le32                cdw12;
>> +    __le32                cdw13;
>> +    __le32                cdw14;
>> +    __le32                cdw15;
>> +};
>> +
>> +#define NVMF_ENAME_LEN    256
>> +#define NVMF_EVER_LEN     64
>> +
>> +enum nvmf_dim_entfmt {
>> +    NVME_ENTFMT_BASIC    = 0x01,
>> +    NVME_ENTFMT_EXTENDED = 0x02,
>> +};
>> +
>> +enum nvmf_dim_etype {
>> +    NVME_REGISTRATION_HOST = 0x01,
>> +    NVME_REGISTRATION_DDC  = 0x02,
>> +    NVME_REGISTRATION_CDC  = 0x03,
>> +};
>> +
>> +enum nvmf_exattype {
>> +    NVME_EXATTYPE_HOSTID   = 0x01, /* Host Identifier */
>> +    NVME_EXATTYPE_SYMNAME  = 0x02, /* Symbolic Name */
>> +};
>> +
>> +/**
>> + * struct nvmf_ext_attr - Extended Attribute (EXAT)
>> + *
>> + *       Bytes       Description
>> + * @type 01:00       Extended Attribute Type (EXATTYPE) - enum 
>> nvmf_exattype
>> + * @len  03:02       Extended Attribute Length (EXATLEN)
>> + * @val  (EXATLEN-1) Extended Attribute Value (EXATVAL) - size allocated
>> + *       +4:04       for array must be a multiple of 4 bytes
>> + */
>> +struct nvmf_ext_attr {
>> +    __le16            type;
>> +    __le16            len;
>> +    __u8            val[];
>> +};
>> +#define SIZEOF_EXT_EXAT_WO_VAL offsetof(struct nvmf_ext_attr, val)
>> +
>> +/**
>> + * struct nvmf_ext_die - Extended Discovery Information Entry (DIE)
>> + *
>> + *            Bytes           Description
>> + * @trtype    00              Transport Type (enum nvme_trtype)
>> + * @adrfam    01              Address Family (enum nvmf_addr_familiy)
>> + * @subtype   02              Subsystem Type (enum nvme_subsys_type)
>> + * @treq      03              Transport Requirements (enum nvmf_treq)
>> + * @portid    05:04           Port ID
>> + * @cntlid    07:06           Controller ID
>> + * @asqsz     09:08           Admin Max SQ Size
>> + *            31:10           Reserved
>> + * @trsvcid   63:32           Transport Service Identifier
>> + *            255:64          Reserved
>> + * @nqn       511:256         NVM Qualified Name
>> + * @traddr    767:512         Transport Address
>> + * @tsas      1023:768        Transport Specific Address Subtype
>> + * @tel       1027:1024       Total Entry Length
>> + * @numexat   1029:1028       Number of Extended Attributes
>> + *            1031:1030       Reserved
>> + * @exat[0]   ((EXATLEN-1)+   Extented Attributes 0 (see @numexat)
>> + *            4)+1032:1032    Cannot access as an array because each 
>> EXAT
>> + *                            entry has an undetermined size.
>> + * @exat[N]   TEL-1:TEL-      Extented Attributes N (w/ N = NUMEXAT-1)
>> + *            (EXATLEN+4)
>> + */
>> +struct nvmf_ext_die {
>> +    __u8            trtype;
>> +    __u8            adrfam;
>> +    __u8            subtype;
>> +    __u8            treq;
>> +    __le16            portid;
>> +    __le16            cntlid;
>> +    __le16            asqsz;
>> +    __u8            resv10[22];
>> +    char            trsvcid[NVMF_TRSVCID_SIZE];
>> +    __u8            resv64[192];
>> +    char            nqn[NVMF_NQN_FIELD_LEN];
>> +    char            traddr[NVMF_TRADDR_SIZE];
>> +    union nvmf_tsas        tsas;
>> +    __le32            tel;
>> +    __le16            numexat;
>> +    __u16            resv1030;
>> +    struct nvmf_ext_attr    exat;
>> +};
>> +#define SIZEOF_EXT_DIE_WO_EXAT offsetof(struct nvmf_ext_die, exat)
>> +
>> +/**
>> + * union nvmf_die - Discovery Information Entry (DIE)
>> + *
>> + * Depending on the ENTFMT specified in the DIM, DIEs can be entered 
>> with
>> + * the Basic or Extended formats. For Basic format, each entry has a 
>> fixed
>> + * length. Therefore, the "basic" field defined below can be accessed 
>> as a
>> + * C array. For the Extended format, however, each entry is of variable
>> + * length (TEL). Therefore, the "extended" field defined below cannot be
>> + * accessed as a C array. Instead, the "extended" field is akin to a
>> + * linked-list, where one can "walk" through the list. To move to the 
>> next
>> + * entry, one simply adds the current entry's length (TEL) to the "walk"
>> + * pointer. The number of entries in the list is specified by NUMENT.
>> + * Although extended entries are of a variable lengths (TEL), TEL is
>> + * always a multiple of 4 bytes.
>> + */
>> +union nvmf_die {
>> +    struct nvmf_disc_rsp_page_entry    basic[0];
>> +    struct nvmf_ext_die        extended;
>> +};
>> +
>> +/**
>> + * struct nvmf_dim_data - Discovery Information Management (DIM) - Data
>> + *
>> + *            Bytes       Description
>> + * @tdl       03:00       Total Data Length
>> + *            07:04       Reserved
>> + * @nument    15:08       Number of entries
>> + * @entfmt    17:16       Entry Format (enum nvmf_dim_entfmt)
>> + * @etype     19:18       Entity Type (enum nvmf_dim_etype)
>> + * @portlcL   20          Port Local
>> + *            21          Reserved
>> + * @ektype    23:22       Entry Key Type
>> + * @eid       279:24      Entity Identifier (e.g. Host NQN)
>> + * @ename     535:280     Entity Name (e.g. hostname)
>> + * @ever      599:536     Entity Version (e.g. OS Name/Version)
>> + *            1023:600    Reserved
>> + * @die       TDL-1:1024  Discovery Information Entry (see @nument 
>> above)
>> + */
>> +struct nvmf_dim_data {
>> +    __le32            tdl;
>> +    __u32            rsvd4;
>> +    __le64            nument;
>> +    __le16            entfmt;
>> +    __le16            etype;
>> +    __u8            portlcl;
>> +    __u8            rsvd21;
>> +    __le16            ektype;
>> +    char            eid[NVMF_NQN_FIELD_LEN];
>> +    char            ename[NVMF_ENAME_LEN];
>> +    char            ever[NVMF_EVER_LEN];
>> +    __u8            rsvd600[424];
>> +    union nvmf_die        die;
>> +};
>> +#define SIZEOF_DIM_WO_DIE offsetof(struct nvmf_dim_data, die)
>> +
>> +/**
>> + * exat_len() - Return the size in bytes, rounded to a multiple of 4 
>> ((i.e.
>> + *         size of u32), of the buffer needed to hold the exat value of
>> + *         size @val_len.
>> + */
>> +static inline u16 exat_len(size_t val_len)
>> +{
>> +    return (u16)round_up(val_len, sizeof(u32));
>> +}
>> +
>> +/**
>> + * exat_size - return the size of the "struct nvmf_ext_attr"
>> + *   needed to hold a value of size @val_len.
>> + *
>> + * @val_len: This is the length of the data to be copied to the "val"
>> + *         field of a "struct nvmf_ext_attr".
>> + *
>> + * @return The size in bytes, rounded to a multiple of 4 (i.e. size of
>> + *         u32), of the "struct nvmf_ext_attr" required to hold a 
>> string of
>> + *         length @val_len.
>> + */
>> +static inline u16 exat_size(size_t val_len)
>> +{
>> +    return (u16)(SIZEOF_EXT_EXAT_WO_VAL + exat_len(val_len));
>> +}
>> +
>> +/**
>> + * exat_ptr_next - Increment @p to the next element in the array. 
>> Extended
>> + *   attributes are saved to an array of "struct nvmf_ext_attr" where 
>> each
>> + *   element of the array is of variable size. In order to move to 
>> the next
>> + *   element in the array one must increment the pointer to the current
>> + *   element (@p) by the size of the current element.
>> + *
>> + * @p: Pointer to an element of an array of "struct nvmf_ext_attr".
>> + *
>> + * @return Pointer to the next element in the array.
>> + */
>> +static inline struct nvmf_ext_attr *exat_ptr_next(struct 
>> nvmf_ext_attr *p)
>> +{
>> +    return (struct nvmf_ext_attr *)
>> +        ((uintptr_t)p + (ptrdiff_t)exat_size(le16_to_cpu(p->len)));
>> +}
>> +
>> +
>>   struct nvme_command {
>>       union {
>>           struct nvme_common_command common;
>> @@ -1470,6 +1702,7 @@ struct nvme_command {
>>           struct nvmf_property_get_command prop_get;
>>           struct nvme_dbbuf dbbuf;
>>           struct nvme_directive_cmd directive;
>> +        struct nvmf_dim_command dim;
>>       };
>>   };
> I'd rather like to have the TLS bits in a separate patch; they are not 
> strictly related to this issue.
> 
> And I have a rather general issue with this: Is it required to issue an 
> explicit registration directly after sending the 'connect' command?
> IE do we need to have it as part of the 'connect' routine?
> 
> Thing is, the linux 'connect' routine is doing several things in one go
> (like establishing a transport association for the admin queue, send the 
> 'connect' command for the admin queue, create transport associations for 
> I/O queues and send 'connect' commands on all I/O queues, _and_ possibly 
> do authentication, too).
> If we now start overloading it with yet more functionality it becomes 
> extremely hard to cleanup after an error, and that doesn't even mention 
> the non-existing error reporting to userspace.
> 
> Wouldn't it make more sense to delegate explicit registration to 
> userspace (ie libnvme/nvme-cli), and leave the kernel out of it?

It would. There is no reason what-so-ever to place all this register
stuff needs to live in the kernel. We have a way to passthru commands so
it can and should move to userspace.



More information about the Linux-nvme mailing list