[PATCH v2] Improve the performance of --num-threads -d 31
Minoru Usui
min-usui at ti.jp.nec.com
Thu Mar 3 16:59:22 PST 2016
Hi, Zhou, Minfei
> -----Original Message-----
> From: kexec [mailto:kexec-bounces at lists.infradead.org] On Behalf Of Minfei Huang
> Sent: Wednesday, February 24, 2016 11:25 AM
> To: "Zhou, Wenjian/周文剑" <zhouwj-fnst at cn.fujitsu.com>
> Cc: kexec at lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
>
>
> > On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst at cn.fujitsu.com> wrote:
> >
> > Hi,
> >
> > On 02/24/2016 09:43 AM, Minfei Huang wrote:
> >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> >>> Hello, Minfei,
> >>>
> >>> Does it occur every time?
> >>> If not, I think I have known the reason.
> >>
> >> Hi, Wenjian.
> >>
> >> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> >> if option num-thread is appended.
> >>
> >
> > I see.
> > I'm working on it.
> >
> > BTW, did you only test it with --num-threads 128?
> > How is it with --num-threads 32(or smaller value)?
>
> Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> all of the test fail.
I wrote an RFC patch which fixed this problem.
This patch is incremental one from the Zhou's original v2 patch.
I think the problem was caused by page_flag_buf race between consumer and producers.
So, I add the mutex to each page_flag_buf.
Change summary is as follows.
* Add mutex to each page_flag_buf.
* Producer:
- Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf.
- Fix the issue that info->current_pfn becomes larger than end_pfn.
- Keeping info->current_pfn_mutex until producer gets dumpable pfn.
* Consumer:
- Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf.
The result of my test in 5GB dumpfile is following.
===
Common Option: -c --cyclic-buffer=10 --num-threads
[-d0]
num-thread real vs num-threads 0
----------------------------------------
0 209.621 100.0%
1 217.921 104.0%
2 84.539 40.3%
4 51.787 24.7%
8 37.260 17.8%
[-d31]
num-thread real vs num-threads 0
----------------------------------------
0 11.492 100.0%
1 8.572 74.6%
2 4.928 42.9%
4 3.115 27.1%
8 2.259 19.7%
===
How about this approach?
Could you test this?
===
diff --git a/makedumpfile.c b/makedumpfile.c
index 8a0c636..c697f93 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) {
* unfiltered zero page will only take a page_flag_buf
* unfiltered non-zero page will take a page_flag_buf and a page_data_buf
*/
- while (pfn < kdump_thread_args->end_pfn) {
+ while (1) {
buf_ready = FALSE;
while (page_data_buf[index].used != FALSE ||
@@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) {
while (buf_ready == FALSE) {
pthread_testcancel();
- if (page_flag_buf->ready == FLAG_READY)
+ pthread_mutex_lock(&page_flag_buf->mutex);
+ if (page_flag_buf->ready == FLAG_READY) {
+ pthread_mutex_unlock(&page_flag_buf->mutex);
continue;
+ }
- /* get next pfn */
+ /* get next dumpable pfn */
pthread_mutex_lock(&info->current_pfn_mutex);
pfn = info->current_pfn;
- info->current_pfn++;
- page_flag_buf->ready = FLAG_FILLING;
- pthread_mutex_unlock(&info->current_pfn_mutex);
-
- page_flag_buf->pfn = pfn;
+ while(1) {
+ if (pfn >= kdump_thread_args->end_pfn) {
+ pthread_mutex_unlock(&info->current_pfn_mutex);
+ page_flag_buf->pfn = pfn;
+ page_flag_buf->ready = FLAG_READY;
+ pthread_mutex_unlock(&page_flag_buf->mutex);
+ page_data_buf[index].used = FALSE;
+ pthread_mutex_unlock(&page_data_buf[index].mutex);
+ goto finish;
+ }
+ dumpable = is_dumpable(
+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
+ pfn, cycle);
+ if (dumpable)
+ break;
- if (pfn >= kdump_thread_args->end_pfn) {
- page_data_buf[index].used = FALSE;
- page_flag_buf->ready = FLAG_READY;
- break;
+ pfn++;
}
-
- dumpable = is_dumpable(
- info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
- pfn,
- cycle);
- if (!dumpable)
- continue;
+ info->current_pfn = pfn + 1;
+ pthread_mutex_unlock(&info->current_pfn_mutex);
+ page_flag_buf->pfn = pfn;
+ page_flag_buf->ready = FLAG_FILLING;
if (!read_pfn_parallel(fd_memory, pfn, buf,
&bitmap_memory_parallel,
- mmap_cache))
+ mmap_cache)) {
+ pthread_mutex_unlock(&page_flag_buf->mutex);
+ page_data_buf[index].used = FALSE;
+ pthread_mutex_unlock(&page_data_buf[index].mutex);
goto fail;
+ }
filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
info->page_size,
@@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) {
buf_ready = TRUE;
next:
page_flag_buf->ready = FLAG_READY;
+ pthread_mutex_unlock(&page_flag_buf->mutex);
page_flag_buf = page_flag_buf->next;
-
}
-
pthread_mutex_unlock(&page_data_buf[index].mutex);
}
+finish:
retval = NULL;
fail:
@@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
mdf_pfn_t start_pfn, end_pfn;
struct page_desc pd;
struct timeval tv_start;
- struct timeval last, new;
pthread_t **threads = NULL;
struct thread_args *kdump_thread_args = NULL;
void *thread_result;
int page_buf_num;
struct page_data *page_data_buf = NULL;
+ struct page_flag *page_flag_buf = NULL;
int i;
int index;
- int end_count, consuming, check_count;
+ int end_count, consuming, check_count, prevlocked;
mdf_pfn_t current_pfn, temp_pfn;
if (info->flag_elf_dumpfile)
@@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
}
for (i = 0; i < info->num_threads; i++) {
+ for (page_flag_buf = info->page_flag_buf[i];
+ page_flag_buf->next != info->page_flag_buf[i];
+ page_flag_buf = page_flag_buf->next) {
+ page_flag_buf->ready = FLAG_UNUSED;
+ page_flag_buf->pfn = start_pfn;
+ res = pthread_mutex_init(&page_flag_buf->mutex, NULL);
+ if (res != 0) {
+ ERRMSG("Can't initialize mutex of page_flag_buf. %s\n",
+ strerror(res));
+ goto out;
+ }
+ }
+
kdump_thread_args[i].thread_num = i;
kdump_thread_args[i].len_buf_out = len_buf_out;
kdump_thread_args[i].start_pfn = start_pfn;
@@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
end_count = 0;
while (1) {
- consuming = 0;
- check_count = 0;
-
/*
* The basic idea is producer producing page and consumer writing page.
* Each producer have a page_flag_buf list which is used for storing page's description.
@@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
* The main thread is the consumer. It will find the next pfn and write it into file.
* The next pfn is smallest pfn in all page_flag_buf.
*/
- gettimeofday(&last, NULL);
- while (1) {
+ do {
+ consuming = prevlocked = -1;
+ check_count = 0;
current_pfn = end_pfn;
/*
@@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
* current_pfn is used for recording the value of pfn when checking the pfn.
*/
for (i = 0; i < info->num_threads; i++) {
+ pthread_mutex_lock(&info->page_flag_buf[i]->mutex);
if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
- continue;
+ goto next;
temp_pfn = info->page_flag_buf[i]->pfn;
/*
* count how many threads have reached the end.
*/
if (temp_pfn >= end_pfn) {
- /*
- * prevent setting FLAG_UNUSED being optimized.
- */
- MSG("-");
-
info->page_flag_buf[i]->ready = FLAG_UNUSED;
-
- info->current_pfn = end_pfn;
end_count++;
- continue;
+ goto next;
}
if (current_pfn < temp_pfn)
- continue;
+ goto next;
check_count++;
+ prevlocked = consuming;
consuming = i;
current_pfn = temp_pfn;
+next:
+ /*
+ * Keep mutex of page_flag which has minimam pfn
+ */
+ if (consuming != i) { // unlock page_flag which is not minimum pfn
+ pthread_mutex_unlock(&info->page_flag_buf[i]->mutex);
+ } else if (prevlocked != -1) { // unlock page_flag which previously locked
+ pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex);
+ }
}
/*
@@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
*/
if (end_count >= info->num_threads)
goto finish;
-
- /*
- * Since it has the probabilty that there is no page_flag_buf being ready,
- * we should recheck if it happens.
- */
- if (check_count == 0)
- continue;
-
- /*
- * If the page_flag_buf is not ready, the pfn recorded may be changed.
- * So we should recheck.
- */
- if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
- gettimeofday(&new, NULL);
- if (new.tv_sec - last.tv_sec > WAIT_TIME) {
- ERRMSG("Can't get data of pfn.\n");
- goto out;
- }
- continue;
- }
-
- if (current_pfn == info->page_flag_buf[consuming]->pfn)
- break;
- }
+ } while (check_count == 0);
if ((num_dumped % per) == 0)
print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
num_dumped++;
-
if (info->page_flag_buf[consuming]->zero == TRUE) {
- if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
+ if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) {
+ pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
goto out;
+ }
pfn_zero++;
} else {
index = info->page_flag_buf[consuming]->index;
@@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
/*
* Write the page header.
*/
- if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
+ if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) {
+ pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
goto out;
+ }
/*
* Write the page data.
*/
- if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
+ if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) {
+ pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
goto out;
+ }
page_data_buf[index].used = FALSE;
}
info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
+ pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
}
finish:
@@ -7906,6 +7915,14 @@ out:
}
}
+ for (i = 0; i < info->num_threads; i++) {
+ for (page_flag_buf = info->page_flag_buf[i];
+ page_flag_buf->next != info->page_flag_buf[i];
+ page_flag_buf = page_flag_buf->next) {
+ pthread_mutex_destroy(&page_flag_buf->mutex);
+ }
+ }
+
pthread_rwlock_destroy(&info->usemmap_rwlock);
pthread_mutex_destroy(&info->filter_mutex);
pthread_mutex_destroy(&info->consumed_pfn_mutex);
diff --git a/makedumpfile.h b/makedumpfile.h
index 80ce23a..188ec17 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong;
*/
#define PAGE_DATA_NUM (50)
-#define WAIT_TIME (60 * 10)
#define PTHREAD_FAIL ((void *)-2)
#define NUM_BUFFERS (20)
@@ -987,6 +986,7 @@ enum {
FLAG_FILLING
};
struct page_flag {
+ pthread_mutex_t mutex;
mdf_pfn_t pfn;
char zero;
char ready;
===
Thanks
Minoru Usui
More information about the kexec
mailing list