[RFC PATCH v4 19/27] qedn: Add IRQ and fast-path resources initializations
Shai Malin
malin1024 at gmail.com
Wed May 5 18:54:57 BST 2021
On 5/2/21 4:32 PM, Hannes Reinecke wrote:
> On 4/29/21 9:09 PM, Shai Malin wrote:
> > This patch will present the adding of qedn_fp_queue - this is a per cpu
> > core element which handles all of the connections on that cpu core.
> > The qedn_fp_queue will handle a group of connections (NVMeoF QPs) which
> > are handled on the same cpu core, and will only use the same FW-driver
> > resources with no need to be related to the same NVMeoF controller.
> >
> > The per qedn_fq_queue resources are the FW CQ and FW status block:
> > - The FW CQ will be used for the FW to notify the driver that the
> > the exchange has ended and the FW will pass the incoming NVMeoF CQE
> > (if exist) to the driver.
> > - FW status block - which is used for the FW to notify the driver with
> > the producer update of the FW CQE chain.
> >
> > The FW fast-path queues are based on qed_chain.h
> >
> > Acked-by: Igor Russkikh <irusskikh at marvell.com>
> > Signed-off-by: Prabhakar Kushwaha <pkushwaha at marvell.com>
> > Signed-off-by: Omkar Kulkarni <okulkarni at marvell.com>
> > Signed-off-by: Michal Kalderon <mkalderon at marvell.com>
> > Signed-off-by: Ariel Elior <aelior at marvell.com>
> > Signed-off-by: Shai Malin <smalin at marvell.com>
> > ---
> > drivers/nvme/hw/qedn/qedn.h | 26 +++
> > drivers/nvme/hw/qedn/qedn_main.c | 287 ++++++++++++++++++++++++++++++-
> > 2 files changed, 310 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/nvme/hw/qedn/qedn.h b/drivers/nvme/hw/qedn/qedn.h
> > index 7efe2366eb7c..5d4d04d144e4 100644
> > --- a/drivers/nvme/hw/qedn/qedn.h
> > +++ b/drivers/nvme/hw/qedn/qedn.h
> > @@ -33,18 +33,41 @@
> > #define QEDN_PROTO_CQ_PROD_IDX 0
> > #define QEDN_NVMETCP_NUM_FW_CONN_QUEUE_PAGES 2
> >
> > +#define QEDN_PAGE_SIZE 4096 /* FW page size - Configurable */
> > +#define QEDN_IRQ_NAME_LEN 24
> > +#define QEDN_IRQ_NO_FLAGS 0
> > +
> > +/* TCP defines */
> > +#define QEDN_TCP_RTO_DEFAULT 280
> > +
> > enum qedn_state {
> > QEDN_STATE_CORE_PROBED = 0,
> > QEDN_STATE_CORE_OPEN,
> > QEDN_STATE_GL_PF_LIST_ADDED,
> > QEDN_STATE_MFW_STATE,
> > + QEDN_STATE_NVMETCP_OPEN,
> > + QEDN_STATE_IRQ_SET,
> > + QEDN_STATE_FP_WORK_THREAD_SET,
> > QEDN_STATE_REGISTERED_OFFLOAD_DEV,
> > QEDN_STATE_MODULE_REMOVE_ONGOING,
> > };
> >
> > +/* Per CPU core params */
> > +struct qedn_fp_queue {
> > + struct qed_chain cq_chain;
> > + u16 *cq_prod;
> > + struct mutex cq_mutex; /* cq handler mutex */
> > + struct qedn_ctx *qedn;
> > + struct qed_sb_info *sb_info;
> > + unsigned int cpu;
> > + u16 sb_id;
> > + char irqname[QEDN_IRQ_NAME_LEN];
> > +};
> > +
> > struct qedn_ctx {
> > struct pci_dev *pdev;
> > struct qed_dev *cdev;
> > + struct qed_int_info int_info;
> > struct qed_dev_nvmetcp_info dev_info;
> > struct nvme_tcp_ofld_dev qedn_ofld_dev;
> > struct qed_pf_params pf_params;
> > @@ -57,6 +80,9 @@ struct qedn_ctx {
> >
> > /* Fast path queues */
> > u8 num_fw_cqs;
> > + struct qedn_fp_queue *fp_q_arr;
> > + struct nvmetcp_glbl_queue_entry *fw_cq_array_virt;
> > + dma_addr_t fw_cq_array_phy; /* Physical address of fw_cq_array_virt */
> > };
> >
> > struct qedn_global {
> > diff --git a/drivers/nvme/hw/qedn/qedn_main.c b/drivers/nvme/hw/qedn/qedn_main.c
> > index 52007d35622d..0135a1f490da 100644
> > --- a/drivers/nvme/hw/qedn/qedn_main.c
> > +++ b/drivers/nvme/hw/qedn/qedn_main.c
> > @@ -141,6 +141,104 @@ static struct nvme_tcp_ofld_ops qedn_ofld_ops = {
> > .commit_rqs = qedn_commit_rqs,
> > };
> >
> > +/* Fastpath IRQ handler */
> > +static irqreturn_t qedn_irq_handler(int irq, void *dev_id)
> > +{
> > + /* Placeholder */
> > +
> > + return IRQ_HANDLED;
> > +}
> > +
> > +static void qedn_sync_free_irqs(struct qedn_ctx *qedn)
> > +{
> > + u16 vector_idx;
> > + int i;
> > +
> > + for (i = 0; i < qedn->num_fw_cqs; i++) {
> > + vector_idx = i * qedn->dev_info.common.num_hwfns +
> > + qed_ops->common->get_affin_hwfn_idx(qedn->cdev);
> > + synchronize_irq(qedn->int_info.msix[vector_idx].vector);
> > + irq_set_affinity_hint(qedn->int_info.msix[vector_idx].vector,
> > + NULL);
> > + free_irq(qedn->int_info.msix[vector_idx].vector,
> > + &qedn->fp_q_arr[i]);
> > + }
> > +
> > + qedn->int_info.used_cnt = 0;
> > + qed_ops->common->set_fp_int(qedn->cdev, 0);
> > +}
> > +
> > +static int qedn_request_msix_irq(struct qedn_ctx *qedn)
> > +{
> > + struct pci_dev *pdev = qedn->pdev;
> > + struct qedn_fp_queue *fp_q = NULL;
> > + int i, rc, cpu;
> > + u16 vector_idx;
> > + u32 vector;
> > +
> > + /* numa-awareness will be added in future enhancements */
> > + cpu = cpumask_first(cpu_online_mask);
> > + for (i = 0; i < qedn->num_fw_cqs; i++) {
> > + fp_q = &qedn->fp_q_arr[i];
> > + vector_idx = i * qedn->dev_info.common.num_hwfns +
> > + qed_ops->common->get_affin_hwfn_idx(qedn->cdev);
> > + vector = qedn->int_info.msix[vector_idx].vector;
> > + sprintf(fp_q->irqname, "qedn_queue_%x.%x.%x_%d",
> > + pdev->bus->number, PCI_SLOT(pdev->devfn),
> > + PCI_FUNC(pdev->devfn), i);
> > + rc = request_irq(vector, qedn_irq_handler, QEDN_IRQ_NO_FLAGS,
> > + fp_q->irqname, fp_q);
> > + if (rc) {
> > + pr_err("request_irq failed.\n");
> > + qedn_sync_free_irqs(qedn);
> > +
> > + return rc;
> > + }
> > +
> > + fp_q->cpu = cpu;
> > + qedn->int_info.used_cnt++;
> > + rc = irq_set_affinity_hint(vector, get_cpu_mask(cpu));
> > + cpu = cpumask_next_wrap(cpu, cpu_online_mask, -1, false);
> > + }
> > +
> > + return 0;
> > +}
> > +
>
> Hah. I knew it.
> So you _do_ have a limited number of MSIx interrupts.
> And that should limit the number of queue pairs, too.
Yes. Thanks!
Will be fixed in the relevant patch in V5.
>
> > +static int qedn_setup_irq(struct qedn_ctx *qedn)
> > +{
> > + int rc = 0;
> > + u8 rval;
> > +
> > + rval = qed_ops->common->set_fp_int(qedn->cdev, qedn->num_fw_cqs);
> > + if (rval < qedn->num_fw_cqs) {
> > + qedn->num_fw_cqs = rval;
> > + if (rval == 0) {
> > + pr_err("set_fp_int return 0 IRQs\n");
> > +
> > + return -ENODEV;
> > + }
> > + }
> > +
> > + rc = qed_ops->common->get_fp_int(qedn->cdev, &qedn->int_info);
> > + if (rc) {
> > + pr_err("get_fp_int failed\n");
> > + goto exit_setup_int;
> > + }
> > +
> > + if (qedn->int_info.msix_cnt) {
> > + rc = qedn_request_msix_irq(qedn);
> > + goto exit_setup_int;
> > + } else {
> > + pr_err("msix_cnt = 0\n");
> > + rc = -EINVAL;
> > + goto exit_setup_int;
> > + }
> > +
> > +exit_setup_int:
> > +
> > + return rc;
> > +}
> > +
> > static inline void qedn_init_pf_struct(struct qedn_ctx *qedn)
> > {
> > /* Placeholder - Initialize qedn fields */
> > @@ -185,21 +283,173 @@ static void qedn_remove_pf_from_gl_list(struct qedn_ctx *qedn)
> > mutex_unlock(&qedn_glb.glb_mutex);
> > }
> >
> > +static void qedn_free_function_queues(struct qedn_ctx *qedn)
> > +{
> > + struct qed_sb_info *sb_info = NULL;
> > + struct qedn_fp_queue *fp_q;
> > + int i;
> > +
> > + /* Free workqueues */
> > +
> > + /* Free the fast path queues*/
> > + for (i = 0; i < qedn->num_fw_cqs; i++) {
> > + fp_q = &qedn->fp_q_arr[i];
> > +
> > + /* Free SB */
> > + sb_info = fp_q->sb_info;
> > + if (sb_info->sb_virt) {
> > + qed_ops->common->sb_release(qedn->cdev, sb_info,
> > + fp_q->sb_id,
> > + QED_SB_TYPE_STORAGE);
> > + dma_free_coherent(&qedn->pdev->dev,
> > + sizeof(*sb_info->sb_virt),
> > + (void *)sb_info->sb_virt,
> > + sb_info->sb_phys);
> > + memset(sb_info, 0, sizeof(*sb_info));
> > + kfree(sb_info);
> > + fp_q->sb_info = NULL;
> > + }
> > +
> > + qed_ops->common->chain_free(qedn->cdev, &fp_q->cq_chain);
> > + }
> > +
> > + if (qedn->fw_cq_array_virt)
> > + dma_free_coherent(&qedn->pdev->dev,
> > + qedn->num_fw_cqs * sizeof(u64),
> > + qedn->fw_cq_array_virt,
> > + qedn->fw_cq_array_phy);
> > + kfree(qedn->fp_q_arr);
> > + qedn->fp_q_arr = NULL;
> > +}
> > +
> > +static int qedn_alloc_and_init_sb(struct qedn_ctx *qedn,
> > + struct qed_sb_info *sb_info, u16 sb_id)
> > +{
> > + int rc = 0;
> > +
> > + sb_info->sb_virt = dma_alloc_coherent(&qedn->pdev->dev,
> > + sizeof(struct status_block_e4),
> > + &sb_info->sb_phys, GFP_KERNEL);
> > + if (!sb_info->sb_virt) {
> > + pr_err("Status block allocation failed\n");
> > +
> > + return -ENOMEM;
> > + }
> > +
> > + rc = qed_ops->common->sb_init(qedn->cdev, sb_info, sb_info->sb_virt,
> > + sb_info->sb_phys, sb_id,
> > + QED_SB_TYPE_STORAGE);
> > + if (rc) {
> > + pr_err("Status block initialization failed\n");
> > +
> > + return rc;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int qedn_alloc_function_queues(struct qedn_ctx *qedn)
> > +{
> > + struct qed_chain_init_params chain_params = {};
> > + struct status_block_e4 *sb = NULL; /* To change to status_block_e4 */
> > + struct qedn_fp_queue *fp_q = NULL;
> > + int rc = 0, arr_size;
> > + u64 cq_phy_addr;
> > + int i;
> > +
> > + /* Place holder - IO-path workqueues */
> > +
> > + qedn->fp_q_arr = kcalloc(qedn->num_fw_cqs,
> > + sizeof(struct qedn_fp_queue), GFP_KERNEL);
> > + if (!qedn->fp_q_arr)
> > + return -ENOMEM;
> > +
> > + arr_size = qedn->num_fw_cqs * sizeof(struct nvmetcp_glbl_queue_entry);
> > + qedn->fw_cq_array_virt = dma_alloc_coherent(&qedn->pdev->dev,
> > + arr_size,
> > + &qedn->fw_cq_array_phy,
> > + GFP_KERNEL);
> > + if (!qedn->fw_cq_array_virt) {
> > + rc = -ENOMEM;
> > + goto mem_alloc_failure;
> > + }
> > +
> > + /* placeholder - create task pools */
> > +
> > + for (i = 0; i < qedn->num_fw_cqs; i++) {
> > + fp_q = &qedn->fp_q_arr[i];
> > + mutex_init(&fp_q->cq_mutex);
> > +
> > + /* FW CQ */
> > + chain_params.intended_use = QED_CHAIN_USE_TO_CONSUME,
> > + chain_params.mode = QED_CHAIN_MODE_PBL,
> > + chain_params.cnt_type = QED_CHAIN_CNT_TYPE_U16,
> > + chain_params.num_elems = QEDN_FW_CQ_SIZE;
> > + chain_params.elem_size = 64; /*Placeholder - sizeof(struct nvmetcp_fw_cqe)*/
> > +
> > + rc = qed_ops->common->chain_alloc(qedn->cdev,
> > + &fp_q->cq_chain,
> > + &chain_params);
> > + if (rc) {
> > + pr_err("CQ chain pci_alloc_consistent fail\n");
> > + goto mem_alloc_failure;
> > + }
> > +
> > + cq_phy_addr = qed_chain_get_pbl_phys(&fp_q->cq_chain);
> > + qedn->fw_cq_array_virt[i].cq_pbl_addr.hi = PTR_HI(cq_phy_addr);
> > + qedn->fw_cq_array_virt[i].cq_pbl_addr.lo = PTR_LO(cq_phy_addr);
> > +
> > + /* SB */
> > + fp_q->sb_info = kzalloc(sizeof(*fp_q->sb_info), GFP_KERNEL);
> > + if (!fp_q->sb_info)
> > + goto mem_alloc_failure;
> > +
> > + fp_q->sb_id = i;
> > + rc = qedn_alloc_and_init_sb(qedn, fp_q->sb_info, fp_q->sb_id);
> > + if (rc) {
> > + pr_err("SB allocation and initialization failed.\n");
> > + goto mem_alloc_failure;
> > + }
> > +
> > + sb = fp_q->sb_info->sb_virt;
> > + fp_q->cq_prod = (u16 *)&sb->pi_array[QEDN_PROTO_CQ_PROD_IDX];
> > + fp_q->qedn = qedn;
> > +
> > + /* Placeholder - Init IO-path workqueue */
> > +
> > + /* Placeholder - Init IO-path resources */
> > + }
> > +
> > + return 0;
> > +
> > +mem_alloc_failure:
> > + pr_err("Function allocation failed\n");
> > + qedn_free_function_queues(qedn);
> > +
> > + return rc;
> > +}
> > +
> > static int qedn_set_nvmetcp_pf_param(struct qedn_ctx *qedn)
> > {
> > u32 fw_conn_queue_pages = QEDN_NVMETCP_NUM_FW_CONN_QUEUE_PAGES;
> > struct qed_nvmetcp_pf_params *pf_params;
> > + int rc;
> >
> > pf_params = &qedn->pf_params.nvmetcp_pf_params;
> > memset(pf_params, 0, sizeof(*pf_params));
> > qedn->num_fw_cqs = min_t(u8, qedn->dev_info.num_cqs, num_online_cpus());
> > + pr_info("Num qedn CPU cores is %u\n", qedn->num_fw_cqs);
> >
> > pf_params->num_cons = QEDN_MAX_CONNS_PER_PF;
> > pf_params->num_tasks = QEDN_MAX_TASKS_PER_PF;
> >
> > - /* Placeholder - Initialize function level queues */
> > + rc = qedn_alloc_function_queues(qedn);
> > + if (rc) {
> > + pr_err("Global queue allocation failed.\n");
> > + goto err_alloc_mem;
> > + }
> >
> > - /* Placeholder - Initialize TCP params */
> > + set_bit(QEDN_STATE_FP_WORK_THREAD_SET, &qedn->state);
> >
> > /* Queues */
> > pf_params->num_sq_pages_in_ring = fw_conn_queue_pages;
> > @@ -207,11 +457,14 @@ static int qedn_set_nvmetcp_pf_param(struct qedn_ctx *qedn)
> > pf_params->num_uhq_pages_in_ring = fw_conn_queue_pages;
> > pf_params->num_queues = qedn->num_fw_cqs;
> > pf_params->cq_num_entries = QEDN_FW_CQ_SIZE;
> > + pf_params->glbl_q_params_addr = qedn->fw_cq_array_phy;
> >
> > /* the CQ SB pi */
> > pf_params->gl_rq_pi = QEDN_PROTO_CQ_PROD_IDX;
> >
> > - return 0;
> > +err_alloc_mem:
> > +
> > + return rc;
> > }
> >
> > static inline int qedn_slowpath_start(struct qedn_ctx *qedn)
> > @@ -255,6 +508,12 @@ static void __qedn_remove(struct pci_dev *pdev)
> > else
> > pr_err("Failed to remove from global PF list\n");
> >
> > + if (test_and_clear_bit(QEDN_STATE_IRQ_SET, &qedn->state))
> > + qedn_sync_free_irqs(qedn);
> > +
> > + if (test_and_clear_bit(QEDN_STATE_NVMETCP_OPEN, &qedn->state))
> > + qed_ops->stop(qedn->cdev);
> > +
> > if (test_and_clear_bit(QEDN_STATE_MFW_STATE, &qedn->state)) {
> > rc = qed_ops->common->update_drv_state(qedn->cdev, false);
> > if (rc)
> > @@ -264,6 +523,9 @@ static void __qedn_remove(struct pci_dev *pdev)
> > if (test_and_clear_bit(QEDN_STATE_CORE_OPEN, &qedn->state))
> > qed_ops->common->slowpath_stop(qedn->cdev);
> >
> > + if (test_and_clear_bit(QEDN_STATE_FP_WORK_THREAD_SET, &qedn->state))
> > + qedn_free_function_queues(qedn);
> > +
> > if (test_and_clear_bit(QEDN_STATE_CORE_PROBED, &qedn->state))
> > qed_ops->common->remove(qedn->cdev);
> >
> > @@ -335,6 +597,25 @@ static int __qedn_probe(struct pci_dev *pdev)
> >
> > set_bit(QEDN_STATE_CORE_OPEN, &qedn->state);
> >
> > + rc = qedn_setup_irq(qedn);
> > + if (rc)
> > + goto exit_probe_and_release_mem;
> > +
> > + set_bit(QEDN_STATE_IRQ_SET, &qedn->state);
> > +
> > + /* NVMeTCP start HW PF */
> > + rc = qed_ops->start(qedn->cdev,
> > + NULL /* Placeholder for FW IO-path resources */,
> > + qedn,
> > + NULL /* Placeholder for FW Event callback */);
> > + if (rc) {
> > + rc = -ENODEV;
> > + pr_err("Cannot start NVMeTCP Function\n");
> > + goto exit_probe_and_release_mem;
> > + }
> > +
> > + set_bit(QEDN_STATE_NVMETCP_OPEN, &qedn->state);
> > +
> > rc = qed_ops->common->update_drv_state(qedn->cdev, true);
> > if (rc) {
> > pr_err("Failed to send drv state to MFW\n");
> >
> So you have a limited number of MSI-x interrupts, but don't limit the
> number of hw queues to that. Why?
Will be fixed in V5.
>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke Kernel Storage Architect
> hare at suse.de +49 911 74053 688
> SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
> HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
More information about the Linux-nvme
mailing list