[PATCH v2] Improve the performance of --num-threads -d 31
Minfei Huang
mhuang at redhat.com
Thu Mar 3 20:17:56 PST 2016
Hi, Minoru.
Sure. I will test this patch.
Thanks
Minfei
On 03/04/16 at 12:59am, Minoru Usui wrote:
> Hi, Zhou, Minfei
>
> > -----Original Message-----
> > From: kexec [mailto:kexec-bounces at lists.infradead.org] On Behalf Of Minfei Huang
> > Sent: Wednesday, February 24, 2016 11:25 AM
> > To: "Zhou, Wenjian/周文剑" <zhouwj-fnst at cn.fujitsu.com>
> > Cc: kexec at lists.infradead.org
> > Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> >
> >
> > > On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst at cn.fujitsu.com> wrote:
> > >
> > > Hi,
> > >
> > > On 02/24/2016 09:43 AM, Minfei Huang wrote:
> > >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> > >>> Hello, Minfei,
> > >>>
> > >>> Does it occur every time?
> > >>> If not, I think I have known the reason.
> > >>
> > >> Hi, Wenjian.
> > >>
> > >> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> > >> if option num-thread is appended.
> > >>
> > >
> > > I see.
> > > I'm working on it.
> > >
> > > BTW, did you only test it with --num-threads 128?
> > > How is it with --num-threads 32(or smaller value)?
> >
> > Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> > all of the test fail.
>
> I wrote an RFC patch which fixed this problem.
> This patch is incremental one from the Zhou's original v2 patch.
>
> I think the problem was caused by page_flag_buf race between consumer and producers.
> So, I add the mutex to each page_flag_buf.
> Change summary is as follows.
>
> * Add mutex to each page_flag_buf.
> * Producer:
> - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf.
> - Fix the issue that info->current_pfn becomes larger than end_pfn.
> - Keeping info->current_pfn_mutex until producer gets dumpable pfn.
> * Consumer:
> - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf.
>
> The result of my test in 5GB dumpfile is following.
>
> ===
> Common Option: -c --cyclic-buffer=10 --num-threads
>
> [-d0]
> num-thread real vs num-threads 0
> ----------------------------------------
> 0 209.621 100.0%
> 1 217.921 104.0%
> 2 84.539 40.3%
> 4 51.787 24.7%
> 8 37.260 17.8%
>
> [-d31]
> num-thread real vs num-threads 0
> ----------------------------------------
> 0 11.492 100.0%
> 1 8.572 74.6%
> 2 4.928 42.9%
> 4 3.115 27.1%
> 8 2.259 19.7%
> ===
>
> How about this approach?
> Could you test this?
>
> ===
> diff --git a/makedumpfile.c b/makedumpfile.c
> index 8a0c636..c697f93 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) {
> * unfiltered zero page will only take a page_flag_buf
> * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
> */
> - while (pfn < kdump_thread_args->end_pfn) {
> + while (1) {
> buf_ready = FALSE;
>
> while (page_data_buf[index].used != FALSE ||
> @@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) {
>
> while (buf_ready == FALSE) {
> pthread_testcancel();
> - if (page_flag_buf->ready == FLAG_READY)
> + pthread_mutex_lock(&page_flag_buf->mutex);
> + if (page_flag_buf->ready == FLAG_READY) {
> + pthread_mutex_unlock(&page_flag_buf->mutex);
> continue;
> + }
>
> - /* get next pfn */
> + /* get next dumpable pfn */
> pthread_mutex_lock(&info->current_pfn_mutex);
> pfn = info->current_pfn;
> - info->current_pfn++;
> - page_flag_buf->ready = FLAG_FILLING;
> - pthread_mutex_unlock(&info->current_pfn_mutex);
> -
> - page_flag_buf->pfn = pfn;
> + while(1) {
> + if (pfn >= kdump_thread_args->end_pfn) {
> + pthread_mutex_unlock(&info->current_pfn_mutex);
> + page_flag_buf->pfn = pfn;
> + page_flag_buf->ready = FLAG_READY;
> + pthread_mutex_unlock(&page_flag_buf->mutex);
> + page_data_buf[index].used = FALSE;
> + pthread_mutex_unlock(&page_data_buf[index].mutex);
> + goto finish;
> + }
> + dumpable = is_dumpable(
> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> + pfn, cycle);
> + if (dumpable)
> + break;
>
> - if (pfn >= kdump_thread_args->end_pfn) {
> - page_data_buf[index].used = FALSE;
> - page_flag_buf->ready = FLAG_READY;
> - break;
> + pfn++;
> }
> -
> - dumpable = is_dumpable(
> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> - pfn,
> - cycle);
> - if (!dumpable)
> - continue;
> + info->current_pfn = pfn + 1;
> + pthread_mutex_unlock(&info->current_pfn_mutex);
> + page_flag_buf->pfn = pfn;
> + page_flag_buf->ready = FLAG_FILLING;
>
> if (!read_pfn_parallel(fd_memory, pfn, buf,
> &bitmap_memory_parallel,
> - mmap_cache))
> + mmap_cache)) {
> + pthread_mutex_unlock(&page_flag_buf->mutex);
> + page_data_buf[index].used = FALSE;
> + pthread_mutex_unlock(&page_data_buf[index].mutex);
> goto fail;
> + }
>
> filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
> info->page_size,
> @@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) {
> buf_ready = TRUE;
> next:
> page_flag_buf->ready = FLAG_READY;
> + pthread_mutex_unlock(&page_flag_buf->mutex);
> page_flag_buf = page_flag_buf->next;
> -
> }
> -
> pthread_mutex_unlock(&page_data_buf[index].mutex);
> }
> +finish:
> retval = NULL;
>
> fail:
> @@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> mdf_pfn_t start_pfn, end_pfn;
> struct page_desc pd;
> struct timeval tv_start;
> - struct timeval last, new;
> pthread_t **threads = NULL;
> struct thread_args *kdump_thread_args = NULL;
> void *thread_result;
> int page_buf_num;
> struct page_data *page_data_buf = NULL;
> + struct page_flag *page_flag_buf = NULL;
> int i;
> int index;
> - int end_count, consuming, check_count;
> + int end_count, consuming, check_count, prevlocked;
> mdf_pfn_t current_pfn, temp_pfn;
>
> if (info->flag_elf_dumpfile)
> @@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> }
>
> for (i = 0; i < info->num_threads; i++) {
> + for (page_flag_buf = info->page_flag_buf[i];
> + page_flag_buf->next != info->page_flag_buf[i];
> + page_flag_buf = page_flag_buf->next) {
> + page_flag_buf->ready = FLAG_UNUSED;
> + page_flag_buf->pfn = start_pfn;
> + res = pthread_mutex_init(&page_flag_buf->mutex, NULL);
> + if (res != 0) {
> + ERRMSG("Can't initialize mutex of page_flag_buf. %s\n",
> + strerror(res));
> + goto out;
> + }
> + }
> +
> kdump_thread_args[i].thread_num = i;
> kdump_thread_args[i].len_buf_out = len_buf_out;
> kdump_thread_args[i].start_pfn = start_pfn;
> @@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>
> end_count = 0;
> while (1) {
> - consuming = 0;
> - check_count = 0;
> -
> /*
> * The basic idea is producer producing page and consumer writing page.
> * Each producer have a page_flag_buf list which is used for storing page's description.
> @@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> * The main thread is the consumer. It will find the next pfn and write it into file.
> * The next pfn is smallest pfn in all page_flag_buf.
> */
> - gettimeofday(&last, NULL);
> - while (1) {
> + do {
> + consuming = prevlocked = -1;
> + check_count = 0;
> current_pfn = end_pfn;
>
> /*
> @@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> * current_pfn is used for recording the value of pfn when checking the pfn.
> */
> for (i = 0; i < info->num_threads; i++) {
> + pthread_mutex_lock(&info->page_flag_buf[i]->mutex);
> if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> - continue;
> + goto next;
> temp_pfn = info->page_flag_buf[i]->pfn;
>
> /*
> * count how many threads have reached the end.
> */
> if (temp_pfn >= end_pfn) {
> - /*
> - * prevent setting FLAG_UNUSED being optimized.
> - */
> - MSG("-");
> -
> info->page_flag_buf[i]->ready = FLAG_UNUSED;
> -
> - info->current_pfn = end_pfn;
> end_count++;
> - continue;
> + goto next;
> }
>
> if (current_pfn < temp_pfn)
> - continue;
> + goto next;
>
> check_count++;
> + prevlocked = consuming;
> consuming = i;
> current_pfn = temp_pfn;
> +next:
> + /*
> + * Keep mutex of page_flag which has minimam pfn
> + */
> + if (consuming != i) { // unlock page_flag which is not minimum pfn
> + pthread_mutex_unlock(&info->page_flag_buf[i]->mutex);
> + } else if (prevlocked != -1) { // unlock page_flag which previously locked
> + pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex);
> + }
> }
>
> /*
> @@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> */
> if (end_count >= info->num_threads)
> goto finish;
> -
> - /*
> - * Since it has the probabilty that there is no page_flag_buf being ready,
> - * we should recheck if it happens.
> - */
> - if (check_count == 0)
> - continue;
> -
> - /*
> - * If the page_flag_buf is not ready, the pfn recorded may be changed.
> - * So we should recheck.
> - */
> - if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> - gettimeofday(&new, NULL);
> - if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> - ERRMSG("Can't get data of pfn.\n");
> - goto out;
> - }
> - continue;
> - }
> -
> - if (current_pfn == info->page_flag_buf[consuming]->pfn)
> - break;
> - }
> + } while (check_count == 0);
>
> if ((num_dumped % per) == 0)
> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
>
> num_dumped++;
>
> -
> if (info->page_flag_buf[consuming]->zero == TRUE) {
> - if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
> + if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) {
> + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
> goto out;
> + }
> pfn_zero++;
> } else {
> index = info->page_flag_buf[consuming]->index;
> @@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> /*
> * Write the page header.
> */
> - if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
> + if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) {
> + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
> goto out;
> + }
> /*
> * Write the page data.
> */
> - if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
> + if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) {
> + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
> goto out;
> + }
> page_data_buf[index].used = FALSE;
> }
> info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
> info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
> }
> finish:
> @@ -7906,6 +7915,14 @@ out:
> }
> }
>
> + for (i = 0; i < info->num_threads; i++) {
> + for (page_flag_buf = info->page_flag_buf[i];
> + page_flag_buf->next != info->page_flag_buf[i];
> + page_flag_buf = page_flag_buf->next) {
> + pthread_mutex_destroy(&page_flag_buf->mutex);
> + }
> + }
> +
> pthread_rwlock_destroy(&info->usemmap_rwlock);
> pthread_mutex_destroy(&info->filter_mutex);
> pthread_mutex_destroy(&info->consumed_pfn_mutex);
> diff --git a/makedumpfile.h b/makedumpfile.h
> index 80ce23a..188ec17 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong;
> */
>
> #define PAGE_DATA_NUM (50)
> -#define WAIT_TIME (60 * 10)
> #define PTHREAD_FAIL ((void *)-2)
> #define NUM_BUFFERS (20)
>
> @@ -987,6 +986,7 @@ enum {
> FLAG_FILLING
> };
> struct page_flag {
> + pthread_mutex_t mutex;
> mdf_pfn_t pfn;
> char zero;
> char ready;
> ===
>
>
> Thanks
> Minoru Usui
More information about the kexec
mailing list