[PATCH v2] Improve the performance of --num-threads -d 31

Minfei Huang mhuang at redhat.com
Thu Mar 3 20:17:56 PST 2016


Hi, Minoru.

Sure. I will test this patch.

Thanks
Minfei

On 03/04/16 at 12:59am, Minoru Usui wrote:
> Hi, Zhou, Minfei
> 
> > -----Original Message-----
> > From: kexec [mailto:kexec-bounces at lists.infradead.org] On Behalf Of Minfei Huang
> > Sent: Wednesday, February 24, 2016 11:25 AM
> > To: "Zhou, Wenjian/周文剑" <zhouwj-fnst at cn.fujitsu.com>
> > Cc: kexec at lists.infradead.org
> > Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> > 
> > 
> > > On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst at cn.fujitsu.com> wrote:
> > >
> > > Hi,
> > >
> > > On 02/24/2016 09:43 AM, Minfei Huang wrote:
> > >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> > >>> Hello, Minfei,
> > >>>
> > >>> Does it occur every time?
> > >>> If not, I think I have known the reason.
> > >>
> > >> Hi, Wenjian.
> > >>
> > >> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> > >> if option num-thread is appended.
> > >>
> > >
> > > I see.
> > > I'm working on it.
> > >
> > > BTW, did you only test it with --num-threads 128?
> > > How is it with --num-threads 32(or smaller value)?
> > 
> > Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> > all of the test fail.
> 
> I wrote an RFC patch which fixed this problem.
> This patch is incremental one from the Zhou's original v2 patch.
> 
> I think the problem was caused by page_flag_buf race between consumer and producers.
> So, I add the mutex to each page_flag_buf.
> Change summary is as follows.
> 
>   * Add mutex to each page_flag_buf.
>   * Producer:
>       - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf. 
>       - Fix the issue that info->current_pfn becomes larger than end_pfn.
>       - Keeping info->current_pfn_mutex until producer gets dumpable pfn.
>   * Consumer:
>       - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf. 
> 
> The result of my test in 5GB dumpfile is following.
> 
> ===
> Common Option: -c --cyclic-buffer=10 --num-threads
> 
> [-d0]
> num-thread    real     vs num-threads 0
> ----------------------------------------
>   0          209.621      100.0%
>   1          217.921      104.0%
>   2           84.539       40.3%
>   4           51.787       24.7%
>   8           37.260       17.8%
> 
> [-d31]
> num-thread    real     vs num-threads 0
> ----------------------------------------
>   0           11.492      100.0%
>   1            8.572       74.6%
>   2            4.928       42.9%
>   4            3.115       27.1%
>   8            2.259       19.7% 
> ===
> 
> How about this approach?
> Could you test this?
> 
> ===
> diff --git a/makedumpfile.c b/makedumpfile.c
> index 8a0c636..c697f93 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) {
>  	 * unfiltered zero page will only take a page_flag_buf
>  	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
>  	 */
> -	while (pfn < kdump_thread_args->end_pfn) {
> +	while (1) {
>  		buf_ready = FALSE;
>  
>  		while (page_data_buf[index].used != FALSE ||
> @@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) {
>  
>  		while (buf_ready == FALSE) {
>  			pthread_testcancel();
> -			if (page_flag_buf->ready == FLAG_READY)
> +			pthread_mutex_lock(&page_flag_buf->mutex);
> +			if (page_flag_buf->ready == FLAG_READY) {
> +				pthread_mutex_unlock(&page_flag_buf->mutex);
>  				continue;
> +			}
>  
> -			/* get next pfn */
> +			/* get next dumpable pfn */
>  			pthread_mutex_lock(&info->current_pfn_mutex);
>  			pfn = info->current_pfn;
> -			info->current_pfn++;
> -			page_flag_buf->ready = FLAG_FILLING;
> -			pthread_mutex_unlock(&info->current_pfn_mutex);
> -
> -			page_flag_buf->pfn = pfn;
> +			while(1) {
> +				if (pfn >= kdump_thread_args->end_pfn) {
> +					pthread_mutex_unlock(&info->current_pfn_mutex);
> +					page_flag_buf->pfn = pfn;
> +					page_flag_buf->ready = FLAG_READY;
> +					pthread_mutex_unlock(&page_flag_buf->mutex);
> +					page_data_buf[index].used = FALSE;
> +					pthread_mutex_unlock(&page_data_buf[index].mutex);
> +					goto finish;
> +				}
> +				dumpable = is_dumpable(
> +					info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> +					pfn, cycle);
> +				if (dumpable)
> +					break;
>  
> -			if (pfn >= kdump_thread_args->end_pfn) {
> -				page_data_buf[index].used = FALSE;
> -				page_flag_buf->ready = FLAG_READY;
> -				break;
> +				pfn++;
>  			}
> -
> -			dumpable = is_dumpable(
> -				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> -				pfn,
> -				cycle);
> -			if (!dumpable)
> -				continue;
> +			info->current_pfn = pfn + 1;
> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> +			page_flag_buf->pfn = pfn;
> +			page_flag_buf->ready = FLAG_FILLING;
>  
>  			if (!read_pfn_parallel(fd_memory, pfn, buf,
>  					       &bitmap_memory_parallel,
> -					       mmap_cache))
> +					       mmap_cache)) {
> +					pthread_mutex_unlock(&page_flag_buf->mutex);
> +					page_data_buf[index].used = FALSE;
> +					pthread_mutex_unlock(&page_data_buf[index].mutex);
>  					goto fail;
> +			}
>  
>  			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
>  							info->page_size,
> @@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) {
>  			buf_ready = TRUE;
>  next:
>  			page_flag_buf->ready = FLAG_READY;
> +			pthread_mutex_unlock(&page_flag_buf->mutex);
>  			page_flag_buf = page_flag_buf->next;
> -
>  		}
> -
>  		pthread_mutex_unlock(&page_data_buf[index].mutex);
>  	}
> +finish:
>  	retval = NULL;
>  
>  fail:
> @@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	mdf_pfn_t start_pfn, end_pfn;
>  	struct page_desc pd;
>  	struct timeval tv_start;
> -	struct timeval last, new;
>  	pthread_t **threads = NULL;
>  	struct thread_args *kdump_thread_args = NULL;
>  	void *thread_result;
>  	int page_buf_num;
>  	struct page_data *page_data_buf = NULL;
> +	struct page_flag *page_flag_buf = NULL;
>  	int i;
>  	int index;
> -	int end_count, consuming, check_count;
> +	int end_count, consuming, check_count, prevlocked;
>  	mdf_pfn_t current_pfn, temp_pfn;
>  
>  	if (info->flag_elf_dumpfile)
> @@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	}
>  
>  	for (i = 0; i < info->num_threads; i++) {
> +		for (page_flag_buf = info->page_flag_buf[i];
> +		     page_flag_buf->next != info->page_flag_buf[i];
> +		     page_flag_buf = page_flag_buf->next) {
> +			page_flag_buf->ready = FLAG_UNUSED;
> +			page_flag_buf->pfn   = start_pfn;
> +			res = pthread_mutex_init(&page_flag_buf->mutex, NULL);
> +			if (res != 0) {
> +				ERRMSG("Can't initialize mutex of page_flag_buf. %s\n",
> +						strerror(res));
> +				goto out;
> +			}
> +		}
> +
>  		kdump_thread_args[i].thread_num = i;
>  		kdump_thread_args[i].len_buf_out = len_buf_out;
>  		kdump_thread_args[i].start_pfn = start_pfn;
> @@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  
>  	end_count = 0;
>  	while (1) {
> -		consuming = 0;
> -		check_count = 0;
> -
>  		/*
>  		 * The basic idea is producer producing page and consumer writing page.
>  		 * Each producer have a page_flag_buf list which is used for storing page's description.
> @@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  		 * The main thread is the consumer. It will find the next pfn and write it into file.
>  		 * The next pfn is smallest pfn in all page_flag_buf.
>  		 */
> -		gettimeofday(&last, NULL);
> -		while (1) {
> +		do {
> +			consuming = prevlocked = -1;
> +			check_count = 0;
>  			current_pfn = end_pfn;
>  
>  			/*
> @@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			 * current_pfn is used for recording the value of pfn when checking the pfn.
>  			 */
>  			for (i = 0; i < info->num_threads; i++) {
> +				pthread_mutex_lock(&info->page_flag_buf[i]->mutex);
>  				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> -					continue;
> +					goto next;
>  				temp_pfn = info->page_flag_buf[i]->pfn;
>  
>  				/*
>  				 * count how many threads have reached the end.
>  				 */
>  				if (temp_pfn >= end_pfn) {
> -					/*
> -					 * prevent setting FLAG_UNUSED being optimized.
> -					 */
> -					MSG("-");
> -
>  					info->page_flag_buf[i]->ready = FLAG_UNUSED;
> -
> -					info->current_pfn = end_pfn;
>  					end_count++;
> -					continue;
> +					goto next;
>  				}
>  
>  				if (current_pfn < temp_pfn)
> -					continue;
> +					goto next;
>  
>  				check_count++;
> +				prevlocked = consuming;
>  				consuming = i;
>  				current_pfn = temp_pfn;
> +next:
> +				/*
> +				 * Keep mutex of page_flag which has minimam pfn
> +				 */
> +				if (consuming != i) {            // unlock page_flag which is not minimum pfn
> +					pthread_mutex_unlock(&info->page_flag_buf[i]->mutex);
> +				} else if (prevlocked != -1) {   // unlock page_flag which previously locked
> +					pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex);
> +				}
>  			}
>  
>  			/*
> @@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			 */
>  			if (end_count >= info->num_threads)
>  				goto finish;
> -
> -			/*
> -			 * Since it has the probabilty that there is no page_flag_buf being ready,
> -			 * we should recheck if it happens.
> -			 */
> -			if (check_count == 0)
> -				continue;
> -
> -			/*
> -			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
> -			 * So we should recheck.
> -			 */
> -			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> -				gettimeofday(&new, NULL);
> -				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> -					ERRMSG("Can't get data of pfn.\n");
> -					goto out;
> -				}
> -				continue;
> -			}
> -
> -			if (current_pfn == info->page_flag_buf[consuming]->pfn)
> -				break;
> -		}
> +		} while (check_count == 0);
>  
>  		if ((num_dumped % per) == 0)
>  			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
>  
>  		num_dumped++;
>  
> -
>  		if (info->page_flag_buf[consuming]->zero == TRUE) {
> -			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
> +			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) {
> +				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  				goto out;
> +			}
>  			pfn_zero++;
>  		} else {
>  			index = info->page_flag_buf[consuming]->index;
> @@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			/*
>  			 * Write the page header.
>  			 */
> -			if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
> +			if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) {
> +				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  				goto out;
> +			}
>  			/*
>  			 * Write the page data.
>  			 */
> -			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
> +			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) {
> +				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  				goto out;
> +			}
>  			page_data_buf[index].used = FALSE;
>  		}
>  		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> +		pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>  	}
>  finish:
> @@ -7906,6 +7915,14 @@ out:
>  		}
>  	}
>  
> +	for (i = 0; i < info->num_threads; i++) {
> +		for (page_flag_buf = info->page_flag_buf[i];
> +		     page_flag_buf->next != info->page_flag_buf[i];
> +		     page_flag_buf = page_flag_buf->next) {
> +			pthread_mutex_destroy(&page_flag_buf->mutex);
> +		}
> +	}
> +
>  	pthread_rwlock_destroy(&info->usemmap_rwlock);
>  	pthread_mutex_destroy(&info->filter_mutex);
>  	pthread_mutex_destroy(&info->consumed_pfn_mutex);
> diff --git a/makedumpfile.h b/makedumpfile.h
> index 80ce23a..188ec17 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong;
>   */
>  
>  #define PAGE_DATA_NUM	(50)
> -#define WAIT_TIME	(60 * 10)
>  #define PTHREAD_FAIL	((void *)-2)
>  #define NUM_BUFFERS	(20)
>  
> @@ -987,6 +986,7 @@ enum {
>  	FLAG_FILLING
>  };
>  struct page_flag {
> +	pthread_mutex_t mutex;
>  	mdf_pfn_t pfn;
>  	char zero;
>  	char ready;
> ===
> 
> 
> Thanks
> Minoru Usui



More information about the kexec mailing list