[PATCH v10 08/26] gunyah: rsc_mgr: Add resource manager RPC core
Elliot Berman
quic_eberman at quicinc.com
Fri Feb 24 14:39:32 PST 2023
On 2/23/2023 3:28 PM, Alex Elder wrote:
> On 2/14/23 3:23 PM, Elliot Berman wrote:
>>
<snip>
>> +}
>> +
>> +static int gh_rm_init_connection_payload(struct gh_rm_connection
>> *connection, void *msg,
>> + size_t hdr_size, size_t msg_size)
>
> The value of hdr_size is *always* sizeof(*hdr), so you can
> do without passing it as an argument.
>
hdr_size is different when receiving reply (1 extra word) vs notification.
>> +{
>> + size_t max_buf_size, payload_size;
>> + struct gh_rm_rpc_hdr *hdr = msg;
>> +
>
> It probably sounds dumb, but I'd reverse the values
> compared below (and the operator).
>
>> + if (hdr_size > msg_size)
>> + return -EINVAL;
>> +
>> + payload_size = msg_size - hdr_size;
>> +
>> + connection->num_fragments = FIELD_GET(RM_RPC_FRAGMENTS_MASK,
>> hdr->type);
>> + connection->fragments_received = 0;
>> +
>> + /* There's not going to be any payload, no need to allocate
>> buffer. */
>> + if (!payload_size && !connection->num_fragments)
>
> The payload size is the same across all messages in the
> "connection" right? As is the number of fragments?
> It's not even possible/valid to have a zero payload size
> and non-zero number of fragments. I think the second
> half of the above test can be dropped.
>
The RM RPC specification doesn't require that the first message have
payload. (It makes sense to do it and it does, but that's implementation
detail)
>> + return 0;
>> +
>> + if (connection->num_fragments > GH_RM_MAX_NUM_FRAGMENTS)
>> + return -EINVAL;
>> +
>> + max_buf_size = payload_size + (connection->num_fragments *
>> GH_RM_MAX_MSG_SIZE);
>> +
>> + connection->payload = kzalloc(max_buf_size, GFP_KERNEL);
>> + if (!connection->payload)
>> + return -ENOMEM;
>> +
>> + memcpy(connection->payload, msg + hdr_size, payload_size);
>
> I think I suggested (hdr + 1) rather than (msg + size) elsewhere
> and you took that suggestion. I'd say do it one way or the other,
> consistently, everywhere.
>
hdr_size != sizeof(*hdr) when we receive a reply message.
>> + connection->size = payload_size;
>> + return 0;
>> +}
>> +
>> +static void gh_rm_notif_work(struct work_struct *work)
>> +{
>> + struct gh_rm_connection *connection = container_of(work, struct
>> gh_rm_connection,
>> + notification.work);
>> + struct gh_rm *rm = connection->notification.rm;
>> +
>> + blocking_notifier_call_chain(&rm->nh, connection->msg_id,
>> connection->payload);
>> +
>> + put_gh_rm(rm);
>> + kfree(connection->payload);
>> + kfree(connection);
>> +}
>> +
>> +static struct gh_rm_connection *gh_rm_process_notif(struct gh_rm *rm,
>> void *msg, size_t msg_size)
>
> I think it might be better if you do some of what the caller
> does here. I.e., verify the current connection is null (and
> abort if not and make it NULL), then assign it to the new
> connection before you return success. And return an errno.
>
Since you and Srini both suggest to do it, I'll cave. :-)
>> +{
>> + struct gh_rm_connection *connection;
>> + struct gh_rm_rpc_hdr *hdr = msg;
>> + int ret;
>> +
>> + connection = gh_rm_alloc_connection(hdr->msg_id, RM_RPC_TYPE_NOTIF);
>> + if (IS_ERR(connection)) {
>> + dev_err(rm->dev, "Failed to alloc connection for
>> notification: %ld, dropping.\n",
>> + PTR_ERR(connection));
>> + return NULL;
>> + }
>> +
>> + get_gh_rm(rm);
>> + connection->notification.rm = rm;
>> + INIT_WORK(&connection->notification.work, gh_rm_notif_work);
>> +
>> + ret = gh_rm_init_connection_payload(connection, msg,
>> sizeof(*hdr), msg_size);
>> + if (ret) {
>> + dev_err(rm->dev, "Failed to initialize connection buffer for
>> notification: %d\n",
>> + ret);
>> + kfree(connection);
>> + return NULL;
>> + }
>> +
>> + return connection;
>> +}
>> +
>> +static struct gh_rm_connection *gh_rm_process_rply(struct gh_rm *rm,
>> void *msg, size_t msg_size)
>> +{
>
> Here too, make sure there is no active connection and then
> set it within this function if the errno returned is 0.
>
>> + struct gh_rm_rpc_reply_hdr *reply_hdr = msg;
>> + struct gh_rm_connection *connection;
>> + u16 seq_id = le16_to_cpu(reply_hdr->hdr.seq);
>> +
>> + mutex_lock(&rm->call_idr_lock);
>> + connection = idr_find(&rm->call_idr, seq_id);
>> + mutex_unlock(&rm->call_idr_lock);
>> +
>> + if (!connection || connection->msg_id != reply_hdr->hdr.msg_id)
>> + return NULL;
>> +
>> + if (gh_rm_init_connection_payload(connection, msg,
>> sizeof(*reply_hdr), msg_size)) {
>> + dev_err(rm->dev, "Failed to alloc connection buffer for
>> sequence %d\n", seq_id);
>> + /* Send connection complete and error the client. */
>> + connection->reply.ret = -ENOMEM;
>> + complete(&connection->reply.seq_done);
>> + return NULL;
>> + }
>> +
>> + connection->reply.rm_error = le32_to_cpu(reply_hdr->err_code);
>> + return connection;
>> +}
>> +
>> +static int gh_rm_process_cont(struct gh_rm *rm, struct
>> gh_rm_connection *connection,
>> + void *msg, size_t msg_size)
>
> Similar comment here. Have this function verify there is
> a non-null active connection. Then process the message
> and abort if there's an error (and null the active connection
> pointer).
>
>> +{
>> + struct gh_rm_rpc_hdr *hdr = msg;
>> + size_t payload_size = msg_size - sizeof(*hdr);
>> +
>> + /*
>> + * hdr->fragments and hdr->msg_id preserves the value from first
>> reply
>> + * or notif message. To detect mishandling, check it's still intact.
>> + */
>> + if (connection->msg_id != hdr->msg_id ||
>> + connection->num_fragments != FIELD_GET(RM_RPC_FRAGMENTS_MASK,
>> hdr->type))
>> + return -EINVAL;
>
> Maybe -EBADMSG?
>
>> +
>> + memcpy(connection->payload + connection->size, msg +
>> sizeof(*hdr), payload_size);
>> + connection->size += payload_size;
>> + connection->fragments_received++;
>> + return 0;
>> +}
>> +
>> +static void gh_rm_abort_connection(struct gh_rm_connection *connection)
>> +{
>> + switch (connection->type) {
>> + case RM_RPC_TYPE_REPLY:
>> + connection->reply.ret = -EIO;
>> + complete(&connection->reply.seq_done);
>> + break;
>> + case RM_RPC_TYPE_NOTIF:
>> + fallthrough;
>> + default:
>> + kfree(connection->payload);
>> + kfree(connection);
>> + }
>> +}
>> +
>> +static bool gh_rm_complete_connection(struct gh_rm *rm, struct
>> gh_rm_connection *connection)
>
> The only caller of this function passes rm->active_rx_connection
> as the second argument. It is available to you here, so you
> can get rid of that argument.
>
>> +{
>> + if (!connection || connection->fragments_received !=
>> connection->num_fragments)
>> + return false;
>> +
>> + switch (connection->type) {
>> + case RM_RPC_TYPE_REPLY:
>> + complete(&connection->reply.seq_done);
>> + break;
>> + case RM_RPC_TYPE_NOTIF:
>> + schedule_work(&connection->notification.work);
>> + break;
>> + default:
>> + dev_err(rm->dev, "Invalid message type (%d) received\n",
>> connection->type);
>> + gh_rm_abort_connection(connection);
>> + break;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +static void gh_rm_msgq_rx_data(struct mbox_client *cl, void *mssg)
>> +{
>> + struct gh_rm *rm = container_of(cl, struct gh_rm, msgq_client);
>> + struct gh_msgq_rx_data *rx_data = mssg;
>> + size_t msg_size = rx_data->length;
>> + void *msg = rx_data->data;
>> + struct gh_rm_rpc_hdr *hdr;
>> +
>
> Is it required that at least one byte (past the header) will
> be received? I.e., should the "<=" below just be "<"?
>
>> + if (msg_size <= sizeof(*hdr) || msg_size > GH_MSGQ_MAX_MSG_SIZE)
>> + return;
>
> You previously reported a message here. These seem like
> errors, which if they occur, maybe should be reported.
> They seem like "never happen" issues, but it's defensive
> to make these checks (which is good).
>
>> +
>> + hdr = msg;
>> + if (hdr->api != RM_RPC_API) {
>
> If this ever happens, is the hardware failing? It seems
> like once Gunyah is initialized and you've checked the
> API version once, there should be no need to check it
> repeatedly.
I'd need to check the API version for the first message. On subsequent
messages, I'd need to check if I already checked. Might as well just
check the version every time?
<done for all the comments snipped>
>> +
>> +void get_gh_rm(struct gh_rm *rm)
>
> It is often pretty handy to return the argument in
> functions like this. It simultaneously takes the
> reference and assigns the pointer the reference
> represents.
>
>
I've updated so that gh_rm_get() returns a struct device * (the
miscdev's device). Is this too unusual?
>> +{
>> + get_device(rm->dev);
>> +}
>> +EXPORT_SYMBOL_GPL(get_gh_rm);
>> +
>> +void put_gh_rm(struct gh_rm *rm)
>> +{
>> + put_device(rm->dev);
>> +}
>> +EXPORT_SYMBOL_GPL(put_gh_rm);
>> +
>> +static int gh_msgq_platform_probe_direction(struct platform_device
>> *pdev,
>> + bool tx, int idx, struct gunyah_resource *ghrsc)
>> +{
>> + struct device_node *node = pdev->dev.of_node;
>> + int ret;
>
> I think you should declare idx as a local variable.
>
> int idx = tx ? 1 : 0;
> >> +
>> + ghrsc->type = tx ? GUNYAH_RESOURCE_TYPE_MSGQ_TX :
>> GUNYAH_RESOURCE_TYPE_MSGQ_RX;
>> +
>> + ghrsc->irq = platform_get_irq(pdev, idx);
>
> Do you suppose you could do platform_get_irq_byname(), and then
> specify the names of the IRQs ("rm_tx_irq" and "rm_rx_irq" maybe)?
>
>> + if (ghrsc->irq < 0) {
>> + dev_err(&pdev->dev, "Failed to get irq%d: %d\n", idx,
>> ghrsc->irq);
>
> Maybe: "Failed to get %cX IRQ: %d\n", tx ? 'T' : 'R', ghrsc->irq);
>
>> + return ghrsc->irq;
>> + }
>> +
>> + ret = of_property_read_u64_index(node, "reg", idx, &ghrsc->capid);
>
> Is a capability ID a simple (but large) number?
>
> The *resource manager* (which is a very special VM) has to
> have both TX and RX message queue capability IDs. Is there
> 'any chance that these specific capability IDs have values
> that are fixed by the design? Like, 0 and 1? I don't know
> what they are, but it seems like it *could* be something
> fixed by the design, and if that were the case, there would
> be no need to specify the "reg" property to get the "capid"
> values.
>
They aren't fixed by the design in a production version of Gunyah.
>> + if (ret) {
>> + dev_err(&pdev->dev, "Failed to get capid%d: %d\n", idx, ret);
>> + return ret;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int gh_rm_drv_probe(struct platform_device *pdev)
>> +{
>> + struct gh_msgq_tx_data *msg;
>> + struct gh_rm *rm;
>> + int ret;
>> +
>> + rm = devm_kzalloc(&pdev->dev, sizeof(*rm), GFP_KERNEL);
>> + if (!rm)
>> + return -ENOMEM;
>> +
>> + platform_set_drvdata(pdev, rm);
>> + rm->dev = &pdev->dev;
>> +
>> + mutex_init(&rm->call_idr_lock);
>> + idr_init(&rm->call_idr);
>> + rm->cache = kmem_cache_create("gh_rm", struct_size(msg, data,
>> GH_MSGQ_MAX_MSG_SIZE), 0,
>> + SLAB_HWCACHE_ALIGN, NULL);
>> + if (!rm->cache)
>> + return -ENOMEM;
>
> If you abstracted the allocation interface for these messages,
> you could actually survive without the slab cache here. But
> if this fails, maybe you won't get far anyway.
>
>> + mutex_init(&rm->send_lock);
>> + BLOCKING_INIT_NOTIFIER_HEAD(&rm->nh);
>> +
>> + ret = gh_msgq_platform_probe_direction(pdev, true, 0,
>> &rm->tx_ghrsc);
>> + if (ret)
>> + goto err_cache;
>> +
>> + ret = gh_msgq_platform_probe_direction(pdev, false, 1,
>> &rm->rx_ghrsc);
>> + if (ret)
>> + goto err_cache;
>> +
>> + rm->msgq_client.dev = &pdev->dev;
>> + rm->msgq_client.tx_block = true;
>> + rm->msgq_client.rx_callback = gh_rm_msgq_rx_data;
>> + rm->msgq_client.tx_done = gh_rm_msgq_tx_done;
>> +
>> + return gh_msgq_init(&pdev->dev, &rm->msgq, &rm->msgq_client,
>> &rm->tx_ghrsc, &rm->rx_ghrsc);
>> +err_cache:
>> + kmem_cache_destroy(rm->cache);
>> + return ret;
>> +}
>> +
>> +static int gh_rm_drv_remove(struct platform_device *pdev)
>> +{
>> + struct gh_rm *rm = platform_get_drvdata(pdev);
>> +
>> + mbox_free_channel(gh_msgq_chan(&rm->msgq));
>> + gh_msgq_remove(&rm->msgq);
>> + kmem_cache_destroy(rm->cache);
>> +
>> + return 0;
>> +}
>> +
>> +static const struct of_device_id gh_rm_of_match[] = {
>> + { .compatible = "gunyah-resource-manager" },
>> + {}
>> +};
>> +MODULE_DEVICE_TABLE(of, gh_rm_of_match);
>> +
>> +static struct platform_driver gh_rm_driver = {
>> + .probe = gh_rm_drv_probe,
>> + .remove = gh_rm_drv_remove,
>> + .driver = {
>> + .name = "gh_rsc_mgr",
>> + .of_match_table = gh_rm_of_match,
>> + },
>> +};
>> +module_platform_driver(gh_rm_driver);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_DESCRIPTION("Gunyah Resource Manager Driver");
>> diff --git a/drivers/virt/gunyah/rsc_mgr.h
>> b/drivers/virt/gunyah/rsc_mgr.h
>> new file mode 100644
>> index 000000000000..d4e799a7526f
>> --- /dev/null
>> +++ b/drivers/virt/gunyah/rsc_mgr.h
>> @@ -0,0 +1,77 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All
>> rights reserved.
>> + */
>> +#ifndef __GH_RSC_MGR_PRIV_H
>> +#define __GH_RSC_MGR_PRIV_H
>> +
>> +#include <linux/gunyah.h>
>> +#include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/types.h>
>> +
>> +/* RM Error codes */
>> +enum gh_rm_error {
>> + GH_RM_ERROR_OK = 0x0,
>> + GH_RM_ERROR_UNIMPLEMENTED = 0xFFFFFFFF,
>> + GH_RM_ERROR_NOMEM = 0x1,
>> + GH_RM_ERROR_NORESOURCE = 0x2,
>> + GH_RM_ERROR_DENIED = 0x3,
>> + GH_RM_ERROR_INVALID = 0x4,
>> + GH_RM_ERROR_BUSY = 0x5,
>> + GH_RM_ERROR_ARGUMENT_INVALID = 0x6,
>> + GH_RM_ERROR_HANDLE_INVALID = 0x7,
>> + GH_RM_ERROR_VALIDATE_FAILED = 0x8,
>> + GH_RM_ERROR_MAP_FAILED = 0x9,
>> + GH_RM_ERROR_MEM_INVALID = 0xA,
>> + GH_RM_ERROR_MEM_INUSE = 0xB,
>> + GH_RM_ERROR_MEM_RELEASED = 0xC,
>> + GH_RM_ERROR_VMID_INVALID = 0xD,
>> + GH_RM_ERROR_LOOKUP_FAILED = 0xE,
>> + GH_RM_ERROR_IRQ_INVALID = 0xF,
>> + GH_RM_ERROR_IRQ_INUSE = 0x10,
>> + GH_RM_ERROR_IRQ_RELEASED = 0x11,
>> +};
>> +
>> +/**
>> + * gh_rm_remap_error() - Remap Gunyah resource manager errors into a
>> Linux error code
>> + * @gh_error: "Standard" return value from Gunyah resource manager
>> + */
>> +static inline int gh_rm_remap_error(enum gh_rm_error rm_error)
>> +{
>> + switch (rm_error) {
>> + case GH_RM_ERROR_OK:
>> + return 0;
>> + case GH_RM_ERROR_UNIMPLEMENTED:
>> + return -EOPNOTSUPP;
>> + case GH_RM_ERROR_NOMEM:
>> + return -ENOMEM;
>> + case GH_RM_ERROR_NORESOURCE:
>> + return -ENODEV;
>> + case GH_RM_ERROR_DENIED:
>> + return -EPERM;
>> + case GH_RM_ERROR_BUSY:
>> + return -EBUSY;
>> + case GH_RM_ERROR_INVALID:
>> + case GH_RM_ERROR_ARGUMENT_INVALID:
>> + case GH_RM_ERROR_HANDLE_INVALID:
>> + case GH_RM_ERROR_VALIDATE_FAILED:
>> + case GH_RM_ERROR_MAP_FAILED:
>> + case GH_RM_ERROR_MEM_INVALID:
>> + case GH_RM_ERROR_MEM_INUSE:
>> + case GH_RM_ERROR_MEM_RELEASED:
>> + case GH_RM_ERROR_VMID_INVALID:
>> + case GH_RM_ERROR_LOOKUP_FAILED:
>> + case GH_RM_ERROR_IRQ_INVALID:
>> + case GH_RM_ERROR_IRQ_INUSE:
>> + case GH_RM_ERROR_IRQ_RELEASED:
>> + return -EINVAL;
>> + default:
>> + return -EBADMSG;
>> + }
>> +}
>> +
>> +struct gh_rm;
>
> This might just be my preference, but I like to see declarations
> like the one above grouped at the top of the file, under includes.
>
>> +int gh_rm_call(struct gh_rm *rsc_mgr, u32 message_id, void *req_buff,
>> size_t req_buff_size,
>> + void **resp_buf, size_t *resp_buff_size);
>> +
>> +#endif
>> diff --git a/include/linux/gunyah_rsc_mgr.h
>> b/include/linux/gunyah_rsc_mgr.h
>> new file mode 100644
>> index 000000000000..c992b3188c8d
>> --- /dev/null
>> +++ b/include/linux/gunyah_rsc_mgr.h
>> @@ -0,0 +1,24 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All
>> rights reserved.
>> + */
>> +
>> +#ifndef _GUNYAH_RSC_MGR_H
>> +#define _GUNYAH_RSC_MGR_H
>> +
>> +#include <linux/list.h>
>> +#include <linux/notifier.h>
>> +#include <linux/gunyah.h>
>> +
>> +#define GH_VMID_INVAL U16_MAX
>> +
>> +/* Gunyah recognizes VMID0 as an alias to the current VM's ID */
>> +#define GH_VMID_SELF 0
>
> I haven't really checked very well, bur you should *use this*
> definition where a VMID is being examined. I.e., if you're
> going to define this, then never just compare a VMID against 0.
>
I realize now the only place I *could* use GH_VMID_SELF is the one
exception to usage of VMID -- in gh_rm_vmid_alloc. There, vmid of 0
means "use dynamic allocation". Since there aren't any users of the
GH_VMID_SELF, I'll drop it.
Thanks,
Elliot
> -Alex
>
>> +
>> +struct gh_rm;
>> +int gh_rm_notifier_register(struct gh_rm *rm, struct notifier_block
>> *nb);
>> +int gh_rm_notifier_unregister(struct gh_rm *rm, struct notifier_block
>> *nb);
>> +void get_gh_rm(struct gh_rm *rm);
>> +void put_gh_rm(struct gh_rm *rm);
>> +
>> +#endif
>
More information about the linux-arm-kernel
mailing list