[RFC PATCH 2/4] nvdimm/pmem: Flush to memory before machine restart

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Tue Jun 18 08:41:55 PDT 2024


Register pre-restart notifiers to flush pmem areas from CPU data cache
to memory on reboot, immediately before restarting the machine. This
ensures all other CPUs are quiescent before the pmem data is flushed to
memory.

I did an earlier POC that flushed caches on panic/die oops notifiers [1],
but it did not cover the reboot case. I've been made aware that some
distribution vendors have started shipping their own modified version of
my earlier POC patch. This makes a strong argument for upstreaming this
work.

Use the newly introduced "pre-restart" notifiers to flush pmem data to
memory immediately before machine restart.

Delta from my POC patch [1]:

Looking at the panic() code, it invokes emergency_restart() to restart
the machine, which uses the new pre-restart notifiers. There is
therefore no need to hook into panic handlers explicitly.

Looking at the die notifiers, those don't actually end up triggering
a machine restart, so it does not appear to be relevant to flush pmem
to memory there. I must admit I originally looked at how ftrace hooked
into panic/die-oops handlers for its ring buffers, but the use-case it
different here: we only want to cover machine restart use-cases.

Link: https://lore.kernel.org/linux-kernel/f6067e3e-a2bc-483d-b214-6e3fe6691279@efficios.com/ [1]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
Cc: Dan Williams <dan.j.williams at intel.com>
Cc: Vishal Verma <vishal.l.verma at intel.com>
Cc: Dave Jiang <dave.jiang at intel.com>
Cc: Ira Weiny <ira.weiny at intel.com>
Cc: Steven Rostedt <rostedt at goodmis.org>
Cc: nvdimm at lists.linux.dev
Cc: Thomas Gleixner <tglx at linutronix.de>
Cc: Ingo Molnar <mingo at redhat.com>
Cc: Borislav Petkov <bp at alien8.de>
Cc: Dave Hansen <dave.hansen at linux.intel.com>
Cc: x86 at kernel.org
Cc: "H. Peter Anvin" <hpa at zytor.com>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Will Deacon <will at kernel.org>
Cc: linux-arm-kernel at lists.infradead.org
---
 drivers/nvdimm/pmem.c | 29 ++++++++++++++++++++++++++++-
 drivers/nvdimm/pmem.h |  2 ++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 598fe2e89bda..bf1d187a9dca 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -26,12 +26,16 @@
 #include <linux/dax.h>
 #include <linux/nd.h>
 #include <linux/mm.h>
+#include <linux/reboot.h>
 #include <asm/cacheflush.h>
 #include "pmem.h"
 #include "btt.h"
 #include "pfn.h"
 #include "nd.h"
 
+static int pmem_pre_restart_handler(struct notifier_block *self,
+		unsigned long ev, void *unused);
+
 static struct device *to_dev(struct pmem_device *pmem)
 {
 	/*
@@ -423,6 +427,7 @@ static void pmem_release_disk(void *__pmem)
 {
 	struct pmem_device *pmem = __pmem;
 
+	unregister_pre_restart_notifier(&pmem->pre_restart_notifier);
 	dax_remove_host(pmem->disk);
 	kill_dax(pmem->dax_dev);
 	put_dax(pmem->dax_dev);
@@ -575,9 +580,14 @@ static int pmem_attach_disk(struct device *dev,
 			goto out_cleanup_dax;
 		dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
 	}
-	rc = device_add_disk(dev, disk, pmem_attribute_groups);
+	pmem->pre_restart_notifier.notifier_call = pmem_pre_restart_handler;
+	pmem->pre_restart_notifier.priority = 0;
+	rc = register_pre_restart_notifier(&pmem->pre_restart_notifier);
 	if (rc)
 		goto out_remove_host;
+	rc = device_add_disk(dev, disk, pmem_attribute_groups);
+	if (rc)
+		goto out_unregister_reboot;
 	if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
 		return -ENOMEM;
 
@@ -589,6 +599,8 @@ static int pmem_attach_disk(struct device *dev,
 		dev_warn(dev, "'badblocks' notification disabled\n");
 	return 0;
 
+out_unregister_pre_restart:
+	unregister_pre_restart_notifier(&pmem->pre_restart_notifier);
 out_remove_host:
 	dax_remove_host(pmem->disk);
 out_cleanup_dax:
@@ -751,6 +763,21 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 	}
 }
 
+/*
+ * For volatile memory use-cases where explicit flushing of the data cache is
+ * not useful after stores, the pmem reboot notifier is called on preparation
+ * for restart to make sure the content of the pmem memory area is flushed from
+ * data cache to memory, so it can be preserved across warm reboot.
+ */
+static int pmem_pre_restart_handler(struct notifier_block *self,
+		unsigned long ev, void *unused)
+{
+	struct pmem_device *pmem = container_of(self, struct pmem_device, pre_restart_notifier);
+
+	arch_wb_cache_pmem(pmem->virt_addr, pmem->size);
+	return NOTIFY_DONE;
+}
+
 MODULE_ALIAS("pmem");
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 392b0b38acb9..b8a2a518cf82 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -4,6 +4,7 @@
 #include <linux/page-flags.h>
 #include <linux/badblocks.h>
 #include <linux/memremap.h>
+#include <linux/notifier.h>
 #include <linux/types.h>
 #include <linux/pfn_t.h>
 #include <linux/fs.h>
@@ -27,6 +28,7 @@ struct pmem_device {
 	struct dax_device	*dax_dev;
 	struct gendisk		*disk;
 	struct dev_pagemap	pgmap;
+	struct notifier_block	pre_restart_notifier;
 };
 
 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
-- 
2.39.2




More information about the linux-arm-kernel mailing list