[RFC PATCH 6/6] block: implement NVMEM provider

Daniel Golle daniel at makrotopia.org
Wed Jul 19 17:14:53 PDT 2023


On Thu, Jul 20, 2023 at 08:04:56AM +0900, Damien Le Moal wrote:
> On 7/20/23 07:04, Daniel Golle wrote:
> > On embedded devices using an eMMC it is common that one or more partitions
> > on the eMMC are used to store MAC addresses and Wi-Fi calibration EEPROM
> > data. Allow referencing the partition in device tree for the kernel and
> > Wi-Fi drivers accessing it via the NVMEM layer.
> > 
> > Signed-off-by: Daniel Golle <daniel at makrotopia.org>
> > ---
> >  block/Kconfig           |   8 ++
> >  block/Makefile          |   1 +
> >  block/blk-nvmem.c       | 187 ++++++++++++++++++++++++++++++++++++++++
> >  block/blk.h             |  13 +++
> >  block/genhd.c           |   2 +
> >  block/partitions/core.c |   2 +
> >  6 files changed, 213 insertions(+)
> >  create mode 100644 block/blk-nvmem.c
> > 
> > diff --git a/block/Kconfig b/block/Kconfig
> > index 86122e459fe04..185573877964d 100644
> > --- a/block/Kconfig
> > +++ b/block/Kconfig
> > @@ -218,6 +218,14 @@ config BLK_MQ_VIRTIO
> >  config BLK_PM
> >  	def_bool PM
> >  
> > +config BLK_NVMEM
> > +	bool "Block device NVMEM provider"
> > +	depends on OF
> > +	help
> > +	  Allow block devices (or partitions) to act as NVMEM prodivers,
> > +	  typically using if an eMMC is used to store MAC addresses or Wi-Fi
> 
> Odd grammar... May be "typically used with eMMC to store ..."

Thank you, I'll use your suggestion.

> 
> > +	  calibration data on embedded devices.
> > +
> >  # do not use in new code
> >  config BLOCK_HOLDER_DEPRECATED
> >  	bool
> > diff --git a/block/Makefile b/block/Makefile
> > index 46ada9dc8bbfe..03c0bfa8642df 100644
> > --- a/block/Makefile
> > +++ b/block/Makefile
> > @@ -34,6 +34,7 @@ obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
> >  obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
> >  obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
> >  obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
> > +obj-$(CONFIG_BLK_NVMEM)		+= blk-nvmem.o
> >  obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
> >  obj-$(CONFIG_BLK_PM)		+= blk-pm.o
> >  obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= blk-crypto.o blk-crypto-profile.o \
> > diff --git a/block/blk-nvmem.c b/block/blk-nvmem.c
> > new file mode 100644
> > index 0000000000000..8238511049f56
> > --- /dev/null
> > +++ b/block/blk-nvmem.c
> > @@ -0,0 +1,187 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * block device NVMEM provider
> > + *
> > + * Copyright (c) 2023 Daniel Golle <daniel at makrotopia.org>
> > + *
> > + * Useful on devices using a partition on an eMMC for MAC addresses or
> > + * Wi-Fi calibration EEPROM data.
> > + */
> > +
> > +#include "blk.h"
> > +#include <linux/nvmem-provider.h>
> > +#include <linux/of.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/property.h>
> > +
> > +/* List of all NVMEM devices */
> > +static LIST_HEAD(nvmem_devices);
> > +static DEFINE_MUTEX(devices_mutex);
> > +
> > +struct blk_nvmem {
> > +	struct nvmem_device *nvmem;
> > +	struct block_device *bdev;
> > +	struct list_head list;
> > +};
> > +
> > +static int blk_nvmem_reg_read(void *priv, unsigned int from,
> > +			      void *val, size_t bytes)
> > +{
> > +	pgoff_t f_index = from >> PAGE_SHIFT;
> > +	struct address_space *mapping;
> > +	struct blk_nvmem *bnv = priv;
> 
> Why not have bnv passed as argument directly ?

Because blk_nvmem_reg_read is used as reg_read function, ie.
nvmem_reg_read_t {aka 'int (*)(void *, unsigned int,  void *, long unsigned int)'}

Hence 'void *' has to be implicitely type-casted into 'struct blk_nvmem *'.

> 
> > +	size_t bytes_left = bytes;
> > +	struct folio *folio;
> > +	unsigned long offs, to_read;
> > +	void *p;
> > +
> > +	if (!bnv->bdev)
> > +		return -ENODEV;
> > +
> > +	offs = from & ((1 << PAGE_SHIFT) - 1);
> 
> offs = from & PAGE_MASK;

Right, that makes it easier...

> 
> from being an int is really odd though.

Yep, but that's how NVMEM framework defines it.

> 
> > +	mapping = bnv->bdev->bd_inode->i_mapping;
> > +
> > +	while (bytes_left) {
> > +		folio = read_mapping_folio(mapping, f_index++, NULL);
> > +		if (IS_ERR(folio))
> > +			return PTR_ERR(folio);
> > +
> > +		to_read = min_t(unsigned long, bytes_left, PAGE_SIZE - offs);
> > +		p = folio_address(folio) + offset_in_folio(folio, offs);
> > +		memcpy(val, p, to_read);
> > +		offs = 0;
> > +		bytes_left -= to_read;
> > +		val += to_read;
> > +		folio_put(folio);
> > +	}
> > +
> > +	return bytes_left == 0 ? 0 : -EIO;
> 
> How can bytes_left be 0 here given the above loop with no break ?

Well, right... Can just be 'return 0;' at this point obviously.

> 
> > +}
> > +
> > +void blk_register_nvmem(struct block_device *bdev)
> > +{
> > +	struct fwnode_handle *fw_parts = NULL, *fw_part_c, *fw_part = NULL;
> > +	struct nvmem_config config = {};
> > +	const char *partname, *uuid;
> > +	struct device *dev, *p0dev;
> > +	struct blk_nvmem *bnv;
> > +	u32 reg;
> > +
> > +	/*
> > +	 * skip devices which set GENHD_FL_NO_NVMEM
> > +	 *
> > +	 * This flag is used for mtdblock and ubiblock devices because
> > +	 * both, MTD and UBI already implement their own NVMEM provider.
> > +	 * To avoid registering multiple NVMEM providers for the same
> > +	 * device node, skip the block NVMEM provider.
> > +	 */
> > +	if (bdev->bd_disk->flags & GENHD_FL_NO_NVMEM)
> > +		return;
> > +
> > +	/* skip too large devices */
> 
> Why ? Is that defined in some standards somewhere ?

NVMEM framework uses 'int' to address byte offsets inside NVMEM devices.
Hence devices larger than INT_MAX cannot be addressed, I will also state
that in that comment.

> 
> > +	if (bdev_nr_bytes(bdev) > INT_MAX)
> > +		return;
> > +
> > +	dev = &bdev->bd_device;
> > +	if (!bdev_is_partition(bdev)) {
> > +		fw_part = dev->fwnode;
> > +
> > +		if (!fw_part && dev->parent)
> > +			fw_part = dev->parent->fwnode;
> > +
> > +		goto no_parts;
> > +	}
> > +
> > +	p0dev = &bdev->bd_disk->part0->bd_device;
> > +	fw_parts = device_get_named_child_node(p0dev, "partitions");
> > +	if (!fw_parts)
> > +		fw_parts = device_get_named_child_node(p0dev->parent, "partitions");
> > +
> > +	if (!fw_parts)
> > +		return;
> > +
> > +	fwnode_for_each_child_node(fw_parts, fw_part_c) {
> > +		if (!fwnode_property_read_string(fw_part_c, "uuid", &uuid) &&
> > +		    (!bdev->bd_meta_info || strncmp(uuid,
> > +						    bdev->bd_meta_info->uuid,
> > +						    PARTITION_META_INFO_UUIDLTH)))
> > +			continue;
> > +
> > +		if (!fwnode_property_read_string(fw_part_c, "partname", &partname) &&
> > +		    (!bdev->bd_meta_info || strncmp(partname,
> > +						    bdev->bd_meta_info->volname,
> > +						    PARTITION_META_INFO_VOLNAMELTH)))
> > +			continue;
> > +
> > +		/*
> > +		 * partition addresses (reg) in device tree greater than
> > +		 * DISK_MAX_PARTS can be used to match uuid or partname only
> > +		 */
> > +		if (!fwnode_property_read_u32(fw_part_c, "reg", &reg) &&
> > +		    reg < DISK_MAX_PARTS && bdev->bd_partno != reg)
> > +			continue;
> > +
> > +		fw_part = fw_part_c;
> > +		break;
> > +	}
> > +
> > +no_parts:
> > +	if (!fwnode_device_is_compatible(fw_part, "nvmem-cells"))
> > +		return;
> > +
> > +	bnv = kzalloc(sizeof(struct blk_nvmem), GFP_KERNEL);
> > +	if (!bnv)
> > +		return;
> > +
> > +	config.id = NVMEM_DEVID_NONE;
> > +	config.dev = &bdev->bd_device;
> > +	config.name = dev_name(&bdev->bd_device);
> > +	config.owner = THIS_MODULE;
> > +	config.priv = bnv;
> > +	config.reg_read = blk_nvmem_reg_read;
> > +	config.size = bdev_nr_bytes(bdev);
> > +	config.word_size = 1;
> > +	config.stride = 1;
> > +	config.read_only = true;
> > +	config.root_only = true;
> > +	config.ignore_wp = true;
> > +	config.of_node = to_of_node(fw_part);
> > +
> > +	bnv->bdev = bdev;
> > +	bnv->nvmem = nvmem_register(&config);
> > +	if (IS_ERR(bnv->nvmem)) {
> > +		/* Just ignore if there is no NVMEM support in the kernel */
> 
> If there is not, why would this function even be called ?

True. Kconfig depends should make sure blk-nvmem is only built if
nvmem is supported at all.

> 
> > +		if (PTR_ERR(bnv->nvmem) != -EOPNOTSUPP)
> > +			dev_err_probe(&bdev->bd_device, PTR_ERR(bnv->nvmem),
> > +				      "Failed to register NVMEM device\n");
> > +
> > +		kfree(bnv);
> > +		return;
> > +	}
> > +
> > +	mutex_lock(&devices_mutex);
> > +	list_add_tail(&bnv->list, &nvmem_devices);
> > +	mutex_unlock(&devices_mutex);
> > +}
> > +
> > +void blk_unregister_nvmem(struct block_device *bdev)
> > +{
> > +	struct blk_nvmem *bnv_c, *bnv = NULL;
> > +
> > +	mutex_lock(&devices_mutex);
> > +	list_for_each_entry(bnv_c, &nvmem_devices, list)
> > +		if (bnv_c->bdev == bdev) {
> > +			bnv = bnv_c;
> > +			break;
> > +		}
> 
> Curly brackets for list_for_each_entry() {} would be nice, even though they are
> not strictly necessary in this case.

Ack, will add them.

> 
> > +
> > +	if (!bnv) {
> > +		mutex_unlock(&devices_mutex);
> > +		return;
> > +	}
> > +
> > +	list_del(&bnv->list);
> > +	mutex_unlock(&devices_mutex);
> > +	nvmem_unregister(bnv->nvmem);
> > +	kfree(bnv);
> > +}
> > diff --git a/block/blk.h b/block/blk.h
> > index 686712e138352..7423d0d5494e9 100644
> > --- a/block/blk.h
> > +++ b/block/blk.h
> > @@ -515,4 +515,17 @@ static inline int req_ref_read(struct request *req)
> >  	return atomic_read(&req->ref);
> >  }
> >  
> > +#ifdef CONFIG_BLK_NVMEM
> > +void blk_register_nvmem(struct block_device *bdev);
> > +void blk_unregister_nvmem(struct block_device *bdev);
> > +#else
> > +static inline void blk_register_nvmem(struct block_device *bdev)
> > +{
> > +}
> 
> These could go at the end of the static inline line.

I tried keeping the existing style in that header file.
See a couple of lines above:
static inline void bio_integrity_free(struct bio *bio)
{
}

> 
> > +
> > +static inline void blk_unregister_nvmem(struct block_device *bdev)
> > +{
> > +}
> > +#endif
> > +
> >  #endif /* BLK_INTERNAL_H */
> > diff --git a/block/genhd.c b/block/genhd.c
> > index 3d287b32d50df..b306e0f407bb2 100644
> > --- a/block/genhd.c
> > +++ b/block/genhd.c
> > @@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
> >  	disk_update_readahead(disk);
> >  	disk_add_events(disk);
> >  	set_bit(GD_ADDED, &disk->state);
> > +	blk_register_nvmem(disk->part0);
> >  	return 0;
> >  
> >  out_unregister_bdi:
> > @@ -569,6 +570,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
> >  		if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
> >  			bdev->bd_holder_ops->mark_dead(bdev);
> >  		mutex_unlock(&bdev->bd_holder_lock);
> > +		blk_unregister_nvmem(bdev);
> >  
> >  		put_device(&bdev->bd_device);
> >  		rcu_read_lock();
> > diff --git a/block/partitions/core.c b/block/partitions/core.c
> > index 13a7341299a91..68bd655f5e68e 100644
> > --- a/block/partitions/core.c
> > +++ b/block/partitions/core.c
> > @@ -404,6 +404,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
> >  	/* suppress uevent if the disk suppresses it */
> >  	if (!dev_get_uevent_suppress(ddev))
> >  		kobject_uevent(&pdev->kobj, KOBJ_ADD);
> > +
> > +	blk_register_nvmem(bdev);
> >  	return bdev;
> >  
> >  out_del:
> 
> -- 
> Damien Le Moal
> Western Digital Research
> 



More information about the linux-mtd mailing list