[PATCH] um: Abandon the _PAGE_NEWPROT bit
Benjamin Berg
benjamin at sipsolutions.net
Fri Oct 11 00:39:24 PDT 2024
Hi,
On Fri, 2024-10-11 at 13:38 +0800, Tiwei Bie wrote:
> When a PTE is updated in the page table, the _PAGE_NEWPAGE bit will
> always be set. And the corresponding page will always be mapped or
> unmapped depending on whether the PTE is present or not. The check
> on the _PAGE_NEWPROT bit is not really reachable. Abandoning it will
> allow us to simplify the code and remove the unreachable code.
Oh, nice cleanup!
And I like that mprotect is gone as I don't want it in SECCOMP mode :-)
Maybe we should rename _PAGE_NEWPAGE to something like _PAGE_NEEDSYNC?
I think that might make it more clear how everything ties together.
Anyway, the change looks good to me.
Benjamin
Reviewed-by: Benjamin Berg <benjamin.berg at intel.com>
> Signed-off-by: Tiwei Bie <tiwei.btw at antgroup.com>
> ---
> arch/um/include/asm/pgtable.h | 40 ++++-----------
> arch/um/include/shared/os.h | 2 -
> arch/um/include/shared/skas/stub-data.h | 1 -
> arch/um/kernel/skas/stub.c | 10 ----
> arch/um/kernel/tlb.c | 66 +++++++++++------------
> --
> arch/um/os-Linux/skas/mem.c | 21 --------
> 6 files changed, 37 insertions(+), 103 deletions(-)
>
> diff --git a/arch/um/include/asm/pgtable.h
> b/arch/um/include/asm/pgtable.h
> index bd7a9593705f..a32424cfe792 100644
> --- a/arch/um/include/asm/pgtable.h
> +++ b/arch/um/include/asm/pgtable.h
> @@ -12,7 +12,6 @@
>
> #define _PAGE_PRESENT 0x001
> #define _PAGE_NEWPAGE 0x002
> -#define _PAGE_NEWPROT 0x004
> #define _PAGE_RW 0x020
> #define _PAGE_USER 0x040
> #define _PAGE_ACCESSED 0x080
> @@ -151,23 +150,12 @@ static inline int pte_newpage(pte_t pte)
> return pte_get_bits(pte, _PAGE_NEWPAGE);
> }
>
> -static inline int pte_newprot(pte_t pte)
> -{
> - return(pte_present(pte) && (pte_get_bits(pte,
> _PAGE_NEWPROT)));
> -}
> -
> /*
> * =================================
> * Flags setting section.
> * =================================
> */
>
> -static inline pte_t pte_mknewprot(pte_t pte)
> -{
> - pte_set_bits(pte, _PAGE_NEWPROT);
> - return(pte);
> -}
> -
> static inline pte_t pte_mkclean(pte_t pte)
> {
> pte_clear_bits(pte, _PAGE_DIRTY);
> @@ -184,17 +172,14 @@ static inline pte_t pte_wrprotect(pte_t pte)
> {
> if (likely(pte_get_bits(pte, _PAGE_RW)))
> pte_clear_bits(pte, _PAGE_RW);
> - else
> - return pte;
> - return(pte_mknewprot(pte));
> + return pte;
> }
>
> static inline pte_t pte_mkread(pte_t pte)
> {
> - if (unlikely(pte_get_bits(pte, _PAGE_USER)))
> - return pte;
> - pte_set_bits(pte, _PAGE_USER);
> - return(pte_mknewprot(pte));
> + if (likely(!pte_get_bits(pte, _PAGE_USER)))
> + pte_set_bits(pte, _PAGE_USER);
> + return pte;
> }
>
> static inline pte_t pte_mkdirty(pte_t pte)
> @@ -211,18 +196,15 @@ static inline pte_t pte_mkyoung(pte_t pte)
>
> static inline pte_t pte_mkwrite_novma(pte_t pte)
> {
> - if (unlikely(pte_get_bits(pte, _PAGE_RW)))
> - return pte;
> - pte_set_bits(pte, _PAGE_RW);
> - return(pte_mknewprot(pte));
> + if (likely(!pte_get_bits(pte, _PAGE_RW)))
> + pte_set_bits(pte, _PAGE_RW);
> + return pte;
> }
>
> static inline pte_t pte_mkuptodate(pte_t pte)
> {
> pte_clear_bits(pte, _PAGE_NEWPAGE);
> - if(pte_present(pte))
> - pte_clear_bits(pte, _PAGE_NEWPROT);
> - return(pte);
> + return pte;
> }
>
> static inline pte_t pte_mknewpage(pte_t pte)
> @@ -236,12 +218,10 @@ static inline void set_pte(pte_t *pteptr, pte_t
> pteval)
> pte_copy(*pteptr, pteval);
>
> /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE
> so
> - * fix_range knows to unmap it. _PAGE_NEWPROT is specific
> to
> - * mapped pages.
> + * update_pte_range knows to unmap it.
> */
>
> *pteptr = pte_mknewpage(*pteptr);
> - if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr);
> }
>
> #define PFN_PTE_SHIFT PAGE_SHIFT
> @@ -298,8 +278,6 @@ static inline int pte_same(pte_t pte_a, pte_t
> pte_b)
> ({ pte_t pte; \
> \
> pte_set_val(pte, page_to_phys(page), (pgprot)); \
> - if (pte_present(pte)) \
> - pte_mknewprot(pte_mknewpage(pte)); \
> pte;})
>
> static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
> diff --git a/arch/um/include/shared/os.h
> b/arch/um/include/shared/os.h
> index bf539fee7831..09f8201de5db 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -279,8 +279,6 @@ int map(struct mm_id *mm_idp, unsigned long virt,
> unsigned long len, int prot, int phys_fd,
> unsigned long long offset);
> int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long
> len);
> -int protect(struct mm_id *mm_idp, unsigned long addr,
> - unsigned long len, unsigned int prot);
>
> /* skas/process.c */
> extern int is_skas_winch(int pid, int fd, void *data);
> diff --git a/arch/um/include/shared/skas/stub-data.h
> b/arch/um/include/shared/skas/stub-data.h
> index 3fbdda727373..81a4cace032c 100644
> --- a/arch/um/include/shared/skas/stub-data.h
> +++ b/arch/um/include/shared/skas/stub-data.h
> @@ -30,7 +30,6 @@ enum stub_syscall_type {
> STUB_SYSCALL_UNSET = 0,
> STUB_SYSCALL_MMAP,
> STUB_SYSCALL_MUNMAP,
> - STUB_SYSCALL_MPROTECT,
> };
>
> struct stub_syscall {
> diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
> index 5d52ffa682dc..796fc266d3bb 100644
> --- a/arch/um/kernel/skas/stub.c
> +++ b/arch/um/kernel/skas/stub.c
> @@ -35,16 +35,6 @@ static __always_inline int syscall_handler(struct
> stub_data *d)
> return -1;
> }
> break;
> - case STUB_SYSCALL_MPROTECT:
> - res = stub_syscall3(__NR_mprotect,
> - sc->mem.addr, sc-
> >mem.length,
> - sc->mem.prot);
> - if (res) {
> - d->err = res;
> - d->syscall_data_len = i;
> - return -1;
> - }
> - break;
> default:
> d->err = -95; /* EOPNOTSUPP */
> d->syscall_data_len = i;
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 548af31d4111..23c1f550cd7c 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -23,9 +23,6 @@ struct vm_ops {
> int phys_fd, unsigned long long offset);
> int (*unmap)(struct mm_id *mm_idp,
> unsigned long virt, unsigned long len);
> - int (*mprotect)(struct mm_id *mm_idp,
> - unsigned long virt, unsigned long len,
> - unsigned int prot);
> };
>
> static int kern_map(struct mm_id *mm_idp,
> @@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp,
> return os_unmap_memory((void *)virt, len);
> }
>
> -static int kern_mprotect(struct mm_id *mm_idp,
> - unsigned long virt, unsigned long len,
> - unsigned int prot)
> -{
> - return os_protect_memory((void *)virt, len,
> - prot & UM_PROT_READ, prot &
> UM_PROT_WRITE,
> - 1);
> -}
> -
> void report_enomem(void)
> {
> printk(KERN_ERR "UML ran out of memory on the host side! "
> @@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd,
> unsigned long addr,
> struct vm_ops *ops)
> {
> pte_t *pte;
> - int r, w, x, prot, ret = 0;
> + int ret = 0;
>
> pte = pte_offset_kernel(pmd, addr);
> do {
> - r = pte_read(*pte);
> - w = pte_write(*pte);
> - x = pte_exec(*pte);
> - if (!pte_young(*pte)) {
> - r = 0;
> - w = 0;
> - } else if (!pte_dirty(*pte))
> - w = 0;
> -
> - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE
> : 0) |
> - (x ? UM_PROT_EXEC : 0));
> - if (pte_newpage(*pte)) {
> - if (pte_present(*pte)) {
> - __u64 offset;
> - unsigned long phys = pte_val(*pte) &
> PAGE_MASK;
> - int fd = phys_mapping(phys,
> &offset);
> -
> - ret = ops->mmap(ops->mm_idp, addr,
> PAGE_SIZE,
> - prot, fd, offset);
> - } else
> - ret = ops->unmap(ops->mm_idp, addr,
> PAGE_SIZE);
> - } else if (pte_newprot(*pte))
> - ret = ops->mprotect(ops->mm_idp, addr,
> PAGE_SIZE, prot);
> + if (!pte_newpage(*pte))
> + continue;
> +
> + if (pte_present(*pte)) {
> + __u64 offset;
> + unsigned long phys = pte_val(*pte) &
> PAGE_MASK;
> + int fd = phys_mapping(phys, &offset);
> + int r, w, x, prot;
> +
> + r = pte_read(*pte);
> + w = pte_write(*pte);
> + x = pte_exec(*pte);
> + if (!pte_young(*pte)) {
> + r = 0;
> + w = 0;
> + } else if (!pte_dirty(*pte))
> + w = 0;
> +
> + prot = (r ? UM_PROT_READ : 0) |
> + (w ? UM_PROT_WRITE : 0) |
> + (x ? UM_PROT_EXEC : 0);
> +
> + ret = ops->mmap(ops->mm_idp, addr,
> PAGE_SIZE,
> + prot, fd, offset);
> + } else
> + ret = ops->unmap(ops->mm_idp, addr,
> PAGE_SIZE);
> +
> *pte = pte_mkuptodate(*pte);
> } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
> return ret;
> @@ -180,11 +172,9 @@ int um_tlb_sync(struct mm_struct *mm)
> if (mm == &init_mm) {
> ops.mmap = kern_map;
> ops.unmap = kern_unmap;
> - ops.mprotect = kern_mprotect;
> } else {
> ops.mmap = map;
> ops.unmap = unmap;
> - ops.mprotect = protect;
> }
>
> pgd = pgd_offset(mm, addr);
> diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-
> Linux/skas/mem.c
> index 9a13ac23c606..d7f1814b0e5a 100644
> --- a/arch/um/os-Linux/skas/mem.c
> +++ b/arch/um/os-Linux/skas/mem.c
> @@ -217,24 +217,3 @@ int unmap(struct mm_id *mm_idp, unsigned long
> addr, unsigned long len)
>
> return 0;
> }
> -
> -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long
> len,
> - unsigned int prot)
> -{
> - struct stub_syscall *sc;
> -
> - /* Compress with previous syscall if that is possible */
> - sc = syscall_stub_get_previous(mm_idp,
> STUB_SYSCALL_MPROTECT, addr);
> - if (sc && sc->mem.prot == prot) {
> - sc->mem.length += len;
> - return 0;
> - }
> -
> - sc = syscall_stub_alloc(mm_idp);
> - sc->syscall = STUB_SYSCALL_MPROTECT;
> - sc->mem.addr = addr;
> - sc->mem.length = len;
> - sc->mem.prot = prot;
> -
> - return 0;
> -}
More information about the linux-um
mailing list