[PATCH v2 03/34] s390: Use pt_frag_refcount for pagetables
Vishal Moola (Oracle)
vishal.moola at gmail.com
Mon May 1 12:27:58 PDT 2023
s390 currently uses _refcount to identify fragmented page tables.
The page table struct already has a member pt_frag_refcount used by
powerpc, so have s390 use that instead of the _refcount field as well.
This improves the safety for _refcount and the page table tracking.
This also allows us to simplify the tracking since we can once again use
the lower byte of pt_frag_refcount instead of the upper byte of _refcount.
Signed-off-by: Vishal Moola (Oracle) <vishal.moola at gmail.com>
---
arch/s390/mm/pgalloc.c | 38 +++++++++++++++-----------------------
1 file changed, 15 insertions(+), 23 deletions(-)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 66ab68db9842..6b99932abc66 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -182,20 +182,17 @@ void page_table_free_pgste(struct page *page)
* As follows from the above, no unallocated or fully allocated parent
* pages are contained in mm_context_t::pgtable_list.
*
- * The upper byte (bits 24-31) of the parent page _refcount is used
+ * The lower byte (bits 0-7) of the parent page pt_frag_refcount is used
* for tracking contained 2KB-pgtables and has the following format:
*
* PP AA
- * 01234567 upper byte (bits 24-31) of struct page::_refcount
+ * 01234567 upper byte (bits 0-7) of struct page::pt_frag_refcount
* || ||
* || |+--- upper 2KB-pgtable is allocated
* || +---- lower 2KB-pgtable is allocated
* |+------- upper 2KB-pgtable is pending for removal
* +-------- lower 2KB-pgtable is pending for removal
*
- * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
- * using _refcount is possible).
- *
* When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
* The parent page is either:
* - added to mm_context_t::pgtable_list in case the second half of the
@@ -243,11 +240,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list,
struct page, lru);
- mask = atomic_read(&page->_refcount) >> 24;
+ mask = atomic_read(&page->pt_frag_refcount);
/*
* The pending removal bits must also be checked.
* Failure to do so might lead to an impossible
- * value of (i.e 0x13 or 0x23) written to _refcount.
+ * value of (i.e 0x13 or 0x23) written to
+ * pt_frag_refcount.
* Such values violate the assumption that pending and
* allocation bits are mutually exclusive, and the rest
* of the code unrails as result. That could lead to
@@ -259,8 +257,8 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
bit = mask & 1; /* =1 -> second 2K */
if (bit)
table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_refcount,
- 0x01U << (bit + 24));
+ atomic_xor_bits(&page->pt_frag_refcount,
+ 0x01U << bit);
list_del(&page->lru);
}
}
@@ -281,12 +279,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
table = (unsigned long *) page_to_virt(page);
if (mm_alloc_pgste(mm)) {
/* Return 4K page table with PGSTEs */
- atomic_xor_bits(&page->_refcount, 0x03U << 24);
+ atomic_xor_bits(&page->pt_frag_refcount, 0x03U);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
} else {
/* Return the first 2K fragment of the page */
- atomic_xor_bits(&page->_refcount, 0x01U << 24);
+ atomic_xor_bits(&page->pt_frag_refcount, 0x01U);
memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
spin_lock_bh(&mm->context.lock);
list_add(&page->lru, &mm->context.pgtable_list);
@@ -323,22 +321,19 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
* will happen outside of the critical section from this
* function or from __tlb_remove_table()
*/
- mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
+ mask = atomic_xor_bits(&page->pt_frag_refcount, 0x11U << bit);
if (mask & 0x03U)
list_add(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
- mask >>= 24;
+ mask = atomic_xor_bits(&page->pt_frag_refcount, 0x10U << bit);
if (mask != 0x00U)
return;
half = 0x01U << bit;
} else {
half = 0x03U;
- mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
- mask >>= 24;
+ mask = atomic_xor_bits(&page->pt_frag_refcount, 0x03U);
}
page_table_release_check(page, table, half, mask);
@@ -368,8 +363,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
* outside of the critical section from __tlb_remove_table() or from
* page_table_free()
*/
- mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
+ mask = atomic_xor_bits(&page->pt_frag_refcount, 0x11U << bit);
if (mask & 0x03U)
list_add_tail(&page->lru, &mm->context.pgtable_list);
else
@@ -391,14 +385,12 @@ void __tlb_remove_table(void *_table)
return;
case 0x01U: /* lower 2K of a 4K page table */
case 0x02U: /* higher 2K of a 4K page table */
- mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
- mask >>= 24;
+ mask = atomic_xor_bits(&page->pt_frag_refcount, mask << 4);
if (mask != 0x00U)
return;
break;
case 0x03U: /* 4K page table with pgstes */
- mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
- mask >>= 24;
+ mask = atomic_xor_bits(&page->pt_frag_refcount, 0x03U);
break;
}
--
2.39.2
More information about the linux-riscv
mailing list