[PATCH v2 01/14] mm: Batch-copy PTE ranges during fork()

Barry Song 21cnbao at gmail.com
Sun Nov 26 21:54:14 PST 2023


> +copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
> +		  pte_t *dst_pte, pte_t *src_pte,
> +		  unsigned long addr, unsigned long end,
> +		  int *rss, struct folio **prealloc)
>  {
>  	struct mm_struct *src_mm = src_vma->vm_mm;
>  	unsigned long vm_flags = src_vma->vm_flags;
>  	pte_t pte = ptep_get(src_pte);
>  	struct page *page;
>  	struct folio *folio;
> +	int nr = 1;
> +	bool anon;
> +	bool any_dirty = pte_dirty(pte);
> +	int i;
>  
>  	page = vm_normal_page(src_vma, addr, pte);
> -	if (page)
> +	if (page) {
>  		folio = page_folio(page);
> -	if (page && folio_test_anon(folio)) {
> -		/*
> -		 * If this page may have been pinned by the parent process,
> -		 * copy the page immediately for the child so that we'll always
> -		 * guarantee the pinned page won't be randomly replaced in the
> -		 * future.
> -		 */
> -		folio_get(folio);
> -		if (unlikely(page_try_dup_anon_rmap(page, false, src_vma))) {
> -			/* Page may be pinned, we have to copy. */
> -			folio_put(folio);
> -			return copy_present_page(dst_vma, src_vma, dst_pte, src_pte,
> -						 addr, rss, prealloc, page);
> +		anon = folio_test_anon(folio);
> +		nr = folio_nr_pages_cont_mapped(folio, page, src_pte, addr,
> +						end, pte, &any_dirty);

in case we have a large folio with 16 CONTPTE basepages, and userspace
do madvise(addr + 4KB * 5, DONTNEED);

thus, the 4th basepage of PTE becomes PTE_NONE and folio_nr_pages_cont_mapped()
will return 15. in this case, we should copy page0~page3 and page5~page15.

but the current code is copying page0~page14, right? unless we are immediatly
split_folio to basepages in zap_pte_range(), we will have problems?

> +
> +		for (i = 0; i < nr; i++, page++) {
> +			if (anon) {
> +				/*
> +				 * If this page may have been pinned by the
> +				 * parent process, copy the page immediately for
> +				 * the child so that we'll always guarantee the
> +				 * pinned page won't be randomly replaced in the
> +				 * future.
> +				 */
> +				if (unlikely(page_try_dup_anon_rmap(
> +						page, false, src_vma))) {
> +					if (i != 0)
> +						break;
> +					/* Page may be pinned, we have to copy. */
> +					return copy_present_page(
> +						dst_vma, src_vma, dst_pte,
> +						src_pte, addr, rss, prealloc,
> +						page);
> +				}
> +				rss[MM_ANONPAGES]++;
> +				VM_BUG_ON(PageAnonExclusive(page));
> +			} else {
> +				page_dup_file_rmap(page, false);
> +				rss[mm_counter_file(page)]++;
> +			}

Thanks
Barry




More information about the linux-arm-kernel mailing list