[PATCH v2] Improve the performance of --num-threads -d 31

Minoru Usui min-usui at ti.jp.nec.com
Thu Mar 3 16:59:22 PST 2016


Hi, Zhou, Minfei

> -----Original Message-----
> From: kexec [mailto:kexec-bounces at lists.infradead.org] On Behalf Of Minfei Huang
> Sent: Wednesday, February 24, 2016 11:25 AM
> To: "Zhou, Wenjian/周文剑" <zhouwj-fnst at cn.fujitsu.com>
> Cc: kexec at lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> 
> > On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst at cn.fujitsu.com> wrote:
> >
> > Hi,
> >
> > On 02/24/2016 09:43 AM, Minfei Huang wrote:
> >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> >>> Hello, Minfei,
> >>>
> >>> Does it occur every time?
> >>> If not, I think I have known the reason.
> >>
> >> Hi, Wenjian.
> >>
> >> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> >> if option num-thread is appended.
> >>
> >
> > I see.
> > I'm working on it.
> >
> > BTW, did you only test it with --num-threads 128?
> > How is it with --num-threads 32(or smaller value)?
> 
> Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> all of the test fail.

I wrote an RFC patch which fixed this problem.
This patch is incremental one from the Zhou's original v2 patch.

I think the problem was caused by page_flag_buf race between consumer and producers.
So, I add the mutex to each page_flag_buf.
Change summary is as follows.

  * Add mutex to each page_flag_buf.
  * Producer:
      - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf. 
      - Fix the issue that info->current_pfn becomes larger than end_pfn.
      - Keeping info->current_pfn_mutex until producer gets dumpable pfn.
  * Consumer:
      - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf. 

The result of my test in 5GB dumpfile is following.

===
Common Option: -c --cyclic-buffer=10 --num-threads

[-d0]
num-thread    real     vs num-threads 0
----------------------------------------
  0          209.621      100.0%
  1          217.921      104.0%
  2           84.539       40.3%
  4           51.787       24.7%
  8           37.260       17.8%

[-d31]
num-thread    real     vs num-threads 0
----------------------------------------
  0           11.492      100.0%
  1            8.572       74.6%
  2            4.928       42.9%
  4            3.115       27.1%
  8            2.259       19.7% 
===

How about this approach?
Could you test this?

===
diff --git a/makedumpfile.c b/makedumpfile.c
index 8a0c636..c697f93 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) {
 	 * unfiltered zero page will only take a page_flag_buf
 	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
 	 */
-	while (pfn < kdump_thread_args->end_pfn) {
+	while (1) {
 		buf_ready = FALSE;
 
 		while (page_data_buf[index].used != FALSE ||
@@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) {
 
 		while (buf_ready == FALSE) {
 			pthread_testcancel();
-			if (page_flag_buf->ready == FLAG_READY)
+			pthread_mutex_lock(&page_flag_buf->mutex);
+			if (page_flag_buf->ready == FLAG_READY) {
+				pthread_mutex_unlock(&page_flag_buf->mutex);
 				continue;
+			}
 
-			/* get next pfn */
+			/* get next dumpable pfn */
 			pthread_mutex_lock(&info->current_pfn_mutex);
 			pfn = info->current_pfn;
-			info->current_pfn++;
-			page_flag_buf->ready = FLAG_FILLING;
-			pthread_mutex_unlock(&info->current_pfn_mutex);
-
-			page_flag_buf->pfn = pfn;
+			while(1) {
+				if (pfn >= kdump_thread_args->end_pfn) {
+					pthread_mutex_unlock(&info->current_pfn_mutex);
+					page_flag_buf->pfn = pfn;
+					page_flag_buf->ready = FLAG_READY;
+					pthread_mutex_unlock(&page_flag_buf->mutex);
+					page_data_buf[index].used = FALSE;
+					pthread_mutex_unlock(&page_data_buf[index].mutex);
+					goto finish;
+				}
+				dumpable = is_dumpable(
+					info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
+					pfn, cycle);
+				if (dumpable)
+					break;
 
-			if (pfn >= kdump_thread_args->end_pfn) {
-				page_data_buf[index].used = FALSE;
-				page_flag_buf->ready = FLAG_READY;
-				break;
+				pfn++;
 			}
-
-			dumpable = is_dumpable(
-				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
-				pfn,
-				cycle);
-			if (!dumpable)
-				continue;
+			info->current_pfn = pfn + 1;
+			pthread_mutex_unlock(&info->current_pfn_mutex);
+			page_flag_buf->pfn = pfn;
+			page_flag_buf->ready = FLAG_FILLING;
 
 			if (!read_pfn_parallel(fd_memory, pfn, buf,
 					       &bitmap_memory_parallel,
-					       mmap_cache))
+					       mmap_cache)) {
+					pthread_mutex_unlock(&page_flag_buf->mutex);
+					page_data_buf[index].used = FALSE;
+					pthread_mutex_unlock(&page_data_buf[index].mutex);
 					goto fail;
+			}
 
 			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
 							info->page_size,
@@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) {
 			buf_ready = TRUE;
 next:
 			page_flag_buf->ready = FLAG_READY;
+			pthread_mutex_unlock(&page_flag_buf->mutex);
 			page_flag_buf = page_flag_buf->next;
-
 		}
-
 		pthread_mutex_unlock(&page_data_buf[index].mutex);
 	}
+finish:
 	retval = NULL;
 
 fail:
@@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	mdf_pfn_t start_pfn, end_pfn;
 	struct page_desc pd;
 	struct timeval tv_start;
-	struct timeval last, new;
 	pthread_t **threads = NULL;
 	struct thread_args *kdump_thread_args = NULL;
 	void *thread_result;
 	int page_buf_num;
 	struct page_data *page_data_buf = NULL;
+	struct page_flag *page_flag_buf = NULL;
 	int i;
 	int index;
-	int end_count, consuming, check_count;
+	int end_count, consuming, check_count, prevlocked;
 	mdf_pfn_t current_pfn, temp_pfn;
 
 	if (info->flag_elf_dumpfile)
@@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	}
 
 	for (i = 0; i < info->num_threads; i++) {
+		for (page_flag_buf = info->page_flag_buf[i];
+		     page_flag_buf->next != info->page_flag_buf[i];
+		     page_flag_buf = page_flag_buf->next) {
+			page_flag_buf->ready = FLAG_UNUSED;
+			page_flag_buf->pfn   = start_pfn;
+			res = pthread_mutex_init(&page_flag_buf->mutex, NULL);
+			if (res != 0) {
+				ERRMSG("Can't initialize mutex of page_flag_buf. %s\n",
+						strerror(res));
+				goto out;
+			}
+		}
+
 		kdump_thread_args[i].thread_num = i;
 		kdump_thread_args[i].len_buf_out = len_buf_out;
 		kdump_thread_args[i].start_pfn = start_pfn;
@@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 
 	end_count = 0;
 	while (1) {
-		consuming = 0;
-		check_count = 0;
-
 		/*
 		 * The basic idea is producer producing page and consumer writing page.
 		 * Each producer have a page_flag_buf list which is used for storing page's description.
@@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 		 * The main thread is the consumer. It will find the next pfn and write it into file.
 		 * The next pfn is smallest pfn in all page_flag_buf.
 		 */
-		gettimeofday(&last, NULL);
-		while (1) {
+		do {
+			consuming = prevlocked = -1;
+			check_count = 0;
 			current_pfn = end_pfn;
 
 			/*
@@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 * current_pfn is used for recording the value of pfn when checking the pfn.
 			 */
 			for (i = 0; i < info->num_threads; i++) {
+				pthread_mutex_lock(&info->page_flag_buf[i]->mutex);
 				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
-					continue;
+					goto next;
 				temp_pfn = info->page_flag_buf[i]->pfn;
 
 				/*
 				 * count how many threads have reached the end.
 				 */
 				if (temp_pfn >= end_pfn) {
-					/*
-					 * prevent setting FLAG_UNUSED being optimized.
-					 */
-					MSG("-");
-
 					info->page_flag_buf[i]->ready = FLAG_UNUSED;
-
-					info->current_pfn = end_pfn;
 					end_count++;
-					continue;
+					goto next;
 				}
 
 				if (current_pfn < temp_pfn)
-					continue;
+					goto next;
 
 				check_count++;
+				prevlocked = consuming;
 				consuming = i;
 				current_pfn = temp_pfn;
+next:
+				/*
+				 * Keep mutex of page_flag which has minimam pfn
+				 */
+				if (consuming != i) {            // unlock page_flag which is not minimum pfn
+					pthread_mutex_unlock(&info->page_flag_buf[i]->mutex);
+				} else if (prevlocked != -1) {   // unlock page_flag which previously locked
+					pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex);
+				}
 			}
 
 			/*
@@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 */
 			if (end_count >= info->num_threads)
 				goto finish;
-
-			/*
-			 * Since it has the probabilty that there is no page_flag_buf being ready,
-			 * we should recheck if it happens.
-			 */
-			if (check_count == 0)
-				continue;
-
-			/*
-			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
-			 * So we should recheck.
-			 */
-			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
-				gettimeofday(&new, NULL);
-				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
-					ERRMSG("Can't get data of pfn.\n");
-					goto out;
-				}
-				continue;
-			}
-
-			if (current_pfn == info->page_flag_buf[consuming]->pfn)
-				break;
-		}
+		} while (check_count == 0);
 
 		if ((num_dumped % per) == 0)
 			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
 
 		num_dumped++;
 
-
 		if (info->page_flag_buf[consuming]->zero == TRUE) {
-			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
+			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) {
+				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 				goto out;
+			}
 			pfn_zero++;
 		} else {
 			index = info->page_flag_buf[consuming]->index;
@@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			/*
 			 * Write the page header.
 			 */
-			if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
+			if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) {
+				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 				goto out;
+			}
 			/*
 			 * Write the page data.
 			 */
-			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
+			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) {
+				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 				goto out;
+			}
 			page_data_buf[index].used = FALSE;
 		}
 		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
+		pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
 	}
 finish:
@@ -7906,6 +7915,14 @@ out:
 		}
 	}
 
+	for (i = 0; i < info->num_threads; i++) {
+		for (page_flag_buf = info->page_flag_buf[i];
+		     page_flag_buf->next != info->page_flag_buf[i];
+		     page_flag_buf = page_flag_buf->next) {
+			pthread_mutex_destroy(&page_flag_buf->mutex);
+		}
+	}
+
 	pthread_rwlock_destroy(&info->usemmap_rwlock);
 	pthread_mutex_destroy(&info->filter_mutex);
 	pthread_mutex_destroy(&info->consumed_pfn_mutex);
diff --git a/makedumpfile.h b/makedumpfile.h
index 80ce23a..188ec17 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong;
  */
 
 #define PAGE_DATA_NUM	(50)
-#define WAIT_TIME	(60 * 10)
 #define PTHREAD_FAIL	((void *)-2)
 #define NUM_BUFFERS	(20)
 
@@ -987,6 +986,7 @@ enum {
 	FLAG_FILLING
 };
 struct page_flag {
+	pthread_mutex_t mutex;
 	mdf_pfn_t pfn;
 	char zero;
 	char ready;
===


Thanks
Minoru Usui


More information about the kexec mailing list