[PATCH] makedumpfile: Change num_threads to num_producers
Atsushi Kumagai
ats-kumagai at wm.jp.nec.com
Wed Aug 3 17:50:49 PDT 2016
Hello Zhou,
>Currently, num_threads means the producer number. The main thread
>is not included. However, num_threads is specified by the option
>"--num-threads". So this may make user confused why it still has
>performance degradation when the value by "--num-threads" equals
>the usable cpu number.
>
>I think it will be more nature to make "--num-threads" be producer
>number + 1. In other words, "--num-threads" should include the
>main thread.
The most part of cpu time will be spent for compression while
the main thread(Consumer) just does I/O work.
If the usable cpu number is 2 and --num-thread is also 2, your
patch reserves a whole cpu for the main thread and the compression
will be done by just one cpu. Is that really the multi thread
processing which we wanted ?
If there are 4 usable cpus, I guess 4 producers with the main thread
is faster than 3 producers with the main thread.
However, according to my quick test, there were no significant
differences between the two cases.
(This was measured on a small VM(5GB) without your patch, the number
of trials is 3, so just for information.)
# makedumpfile -c --num-thread [3|4]
| | time[s]
CPUs | -num-threads | 1st | 2nd | 3rd
-------+---------------+----------+----------+----------
4 | 3 | 29.54 30.19 28.26
4 | 4 | 29.80 31.75 28.19
Then, I noticed that the cpu usage of the main thread is almost 100%
while it doesn't need a lot of cpu resources in theory. Do you have
any idea why the main thread require a lot of cpu resources ?
If it's an unintended behavior, I believe the case of --num-thread=4
can be faster if the cpu usage of the main thread is reduced.
Thanks,
Atsushi Kumagai
>---
> makedumpfile.c | 108 ++++++++++++++++++++++++++++-----------------------------
> makedumpfile.h | 4 +--
> 2 files changed, 56 insertions(+), 56 deletions(-)
>
>diff --git a/makedumpfile.c b/makedumpfile.c
>index 21784e8..19019a4 100644
>--- a/makedumpfile.c
>+++ b/makedumpfile.c
>@@ -1407,13 +1407,13 @@ open_dump_bitmap(void)
> }
> }
>
>- if (info->num_threads) {
>+ if (info->num_producers) {
> /*
> * Reserve file descriptors of bitmap for creating dumpfiles
> * parallelly, because a bitmap file will be unlinked just after
> * this and it is not possible to open a bitmap file later.
> */
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) {
> ERRMSG("Can't open the bitmap file(%s). %s\n",
> info->name_bitmap, strerror(errno));
>@@ -3495,9 +3495,9 @@ initialize_bitmap_memory(void)
> }
>
> void
>-initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num)
>+initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int producer_num)
> {
>- bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num);
>+ bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(producer_num);
> bitmap->file_name = info->name_memory;
> bitmap->no_block = -1;
> memset(bitmap->buf, 0, BUFSIZE_BITMAP);
>@@ -3529,24 +3529,24 @@ initial_for_parallel()
> /*
> * allocate memory for threads
> */
>- if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads))
>+ if ((info->threads = malloc(sizeof(pthread_t *) * info->num_producers))
> == NULL) {
> MSG("Can't allocate memory for threads. %s\n",
> strerror(errno));
> return FALSE;
> }
>- memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads);
>+ memset(info->threads, 0, sizeof(pthread_t *) * info->num_producers);
>
> if ((info->kdump_thread_args =
>- malloc(sizeof(struct thread_args) * info->num_threads))
>+ malloc(sizeof(struct thread_args) * info->num_producers))
> == NULL) {
> MSG("Can't allocate memory for arguments of threads. %s\n",
> strerror(errno));
> return FALSE;
> }
>- memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads);
>+ memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_producers);
>
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) {
> MSG("Can't allocate memory for thread %d. %s",
> i, strerror(errno));
>@@ -3592,7 +3592,7 @@ initial_for_parallel()
> #endif
> }
>
>- info->num_buffers = PAGE_DATA_NUM * info->num_threads;
>+ info->num_buffers = PAGE_DATA_NUM * info->num_producers;
>
> /*
> * allocate memory for page_data
>@@ -3615,17 +3615,17 @@ initial_for_parallel()
> }
>
> /*
>- * initial page_flag for each thread
>+ * initial page_flag for each producer thread
> */
>- if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
>+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_producers))
> == NULL) {
> MSG("Can't allocate memory for page_flag_buf. %s\n",
> strerror(errno));
> return FALSE;
> }
>- memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
>+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_producers);
>
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> MSG("Can't allocate memory for page_flag. %s\n",
> strerror(errno));
>@@ -3645,9 +3645,9 @@ initial_for_parallel()
> }
>
> /*
>- * initial fd_memory for threads
>+ * initial fd_memory for producer threads
> */
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY))
> < 0) {
> ERRMSG("Can't open the dump memory(%s). %s\n",
>@@ -3673,7 +3673,7 @@ free_for_parallel()
> struct page_flag *current;
>
> if (info->threads != NULL) {
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if (info->threads[i] != NULL)
> free(info->threads[i]);
>
>@@ -3714,7 +3714,7 @@ free_for_parallel()
> }
>
> if (info->page_flag_buf != NULL) {
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> for (j = 0; j < PAGE_FLAG_NUM; j++) {
> if (info->page_flag_buf[i] != NULL) {
> current = info->page_flag_buf[i];
>@@ -3729,7 +3729,7 @@ free_for_parallel()
> if (info->parallel_info == NULL)
> return;
>
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if (FD_MEMORY_PARALLEL(i) > 0)
> close(FD_MEMORY_PARALLEL(i));
>
>@@ -3936,7 +3936,7 @@ out:
> DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic);
> }
>
>- if (info->num_threads) {
>+ if (info->num_producers) {
> if (is_xen_memory()) {
> MSG("'--num-threads' option is disable,\n");
> MSG("because %s is Xen's memory core image.\n",
>@@ -4066,9 +4066,9 @@ initialize_2nd_bitmap(struct dump_bitmap *bitmap)
> }
>
> void
>-initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num)
>+initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int producer_num)
> {
>- bitmap->fd = FD_BITMAP_PARALLEL(thread_num);
>+ bitmap->fd = FD_BITMAP_PARALLEL(producer_num);
> bitmap->file_name = info->name_bitmap;
> bitmap->no_block = -1;
> memset(bitmap->buf, 0, BUFSIZE_BITMAP);
>@@ -7558,7 +7558,7 @@ kdump_thread_function_cyclic(void *arg) {
> volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
> struct cycle *cycle = kdump_thread_args->cycle;
> mdf_pfn_t pfn = cycle->start_pfn;
>- int index = kdump_thread_args->thread_num;
>+ int index = kdump_thread_args->producer_num;
> int buf_ready;
> int dumpable;
> int fd_memory = 0;
>@@ -7566,21 +7566,21 @@ kdump_thread_function_cyclic(void *arg) {
> struct dump_bitmap bitmap_memory_parallel = {0};
> unsigned char *buf = NULL, *buf_out = NULL;
> struct mmap_cache *mmap_cache =
>- MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num);
>+ MMAP_CACHE_PARALLEL(kdump_thread_args->producer_num);
> unsigned long size_out;
>- z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->thread_num);
>+ z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->producer_num);
> #ifdef USELZO
>- lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num);
>+ lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->producer_num);
> #endif
> #ifdef USESNAPPY
> unsigned long len_buf_out_snappy =
> snappy_max_compressed_length(info->page_size);
> #endif
>
>- buf = BUF_PARALLEL(kdump_thread_args->thread_num);
>- buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
>+ buf = BUF_PARALLEL(kdump_thread_args->producer_num);
>+ buf_out = BUF_OUT_PARALLEL(kdump_thread_args->producer_num);
>
>- fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num);
>+ fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->producer_num);
>
> if (info->fd_bitmap) {
> bitmap_parallel.buf = malloc(BUFSIZE_BITMAP);
>@@ -7590,7 +7590,7 @@ kdump_thread_function_cyclic(void *arg) {
> goto fail;
> }
> initialize_2nd_bitmap_parallel(&bitmap_parallel,
>- kdump_thread_args->thread_num);
>+ kdump_thread_args->producer_num);
> }
>
> if (info->flag_refiltering) {
>@@ -7601,7 +7601,7 @@ kdump_thread_function_cyclic(void *arg) {
> goto fail;
> }
> initialize_bitmap_memory_parallel(&bitmap_memory_parallel,
>- kdump_thread_args->thread_num);
>+ kdump_thread_args->producer_num);
> }
>
> /*
>@@ -7802,8 +7802,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> for (i = 0; i < page_buf_num; i++)
> page_data_buf[i].used = FALSE;
>
>- for (i = 0; i < info->num_threads; i++) {
>- kdump_thread_args[i].thread_num = i;
>+ for (i = 0; i < info->num_producers; i++) {
>+ kdump_thread_args[i].producer_num = i;
> kdump_thread_args[i].len_buf_out = len_buf_out;
> kdump_thread_args[i].page_data_buf = page_data_buf;
> kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>@@ -7843,13 +7843,13 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> * consuming is used for recording in which thread the pfn is the smallest.
> * current_pfn is used for recording the value of pfn when checking the pfn.
> */
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> continue;
> temp_pfn = info->page_flag_buf[i]->pfn;
>
> /*
>- * count how many threads have reached the end.
>+ * count how many producer threads have reached the end.
> */
> if (temp_pfn >= end_pfn) {
> info->page_flag_buf[i]->ready = FLAG_UNUSED;
>@@ -7865,9 +7865,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> }
>
> /*
>- * If all the threads have reached the end, we will finish writing.
>+ * If all producer threads have reached the end, we will finish writing.
> */
>- if (end_count >= info->num_threads)
>+ if (end_count >= info->num_producers)
> goto finish;
>
> /*
>@@ -7930,7 +7930,7 @@ finish:
>
> out:
> if (threads != NULL) {
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if (threads[i] != NULL) {
> res = pthread_cancel(*threads[i]);
> if (res != 0 && res != ESRCH)
>@@ -7939,7 +7939,7 @@ out:
> }
> }
>
>- for (i = 0; i < info->num_threads; i++) {
>+ for (i = 0; i < info->num_producers; i++) {
> if (threads[i] != NULL) {
> res = pthread_join(*threads[i], &thread_result);
> if (res != 0)
>@@ -8553,7 +8553,7 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
> if (!write_kdump_bitmap2(&cycle))
> return FALSE;
>
>- if (info->num_threads) {
>+ if (info->num_producers) {
> if (!write_kdump_pages_parallel_cyclic(cd_header,
> cd_page, &pd_zero,
> &offset_data, &cycle))
>@@ -10531,7 +10531,7 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
> if (info->flag_sadump_diskset && !sadump_is_supported_arch())
> return FALSE;
>
>- if (info->num_threads) {
>+ if (info->num_producers) {
> if (info->flag_split) {
> MSG("--num-threads cannot used with --split.\n");
> return FALSE;
>@@ -10607,16 +10607,16 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
> } else
> return FALSE;
>
>- if (info->num_threads) {
>+ if (info->num_producers) {
> if ((info->parallel_info =
>- malloc(sizeof(parallel_info_t) * info->num_threads))
>+ malloc(sizeof(parallel_info_t) * info->num_producers))
> == NULL) {
> MSG("Can't allocate memory for parallel_info.\n");
> return FALSE;
> }
>
> memset(info->parallel_info, 0, sizeof(parallel_info_t)
>- * info->num_threads);
>+ * info->num_producers);
> }
>
> return TRUE;
>@@ -10721,20 +10721,20 @@ calculate_cyclic_buffer_size(void) {
> limit_size = get_free_memory_size() * 0.6;
>
> /*
>- * Recalculate the limit_size according to num_threads.
>- * And reset num_threads if there is not enough memory.
>+ * Recalculate the limit_size according to num_producers.
>+ * And reset --num-threads if there is not enough memory.
> */
>- if (info->num_threads > 0) {
>+ if (info->num_producers > 0) {
> if (limit_size <= maximum_size) {
> MSG("There isn't enough memory for multi-threads.\n");
>- info->num_threads = 0;
>+ info->num_producers = 0;
> }
>- else if ((limit_size - maximum_size) / info->num_threads < THREAD_REGION) {
>- MSG("There isn't enough memory for %d threads.\n", info->num_threads);
>- info->num_threads = (limit_size - maximum_size) / THREAD_REGION;
>- MSG("--num_threads is set to %d.\n", info->num_threads);
>+ else if ((limit_size - maximum_size) / info->num_producers < THREAD_REGION) {
>+ MSG("There isn't enough memory for %d threads.\n", info->num_producers + 1);
>+ info->num_producers = (limit_size - maximum_size) / THREAD_REGION;
>+ MSG("--num-threads is set to %d.\n", info->num_producers + 1);
>
>- limit_size = limit_size - THREAD_REGION * info->num_threads;
>+ limit_size = limit_size - THREAD_REGION * info->num_producers;
> }
> }
>
>@@ -11103,7 +11103,7 @@ main(int argc, char *argv[])
> info->working_dir = optarg;
> break;
> case OPT_NUM_THREADS:
>- info->num_threads = MAX(atoi(optarg), 0);
>+ info->num_producers = MAX(atoi(optarg), 1) - 1;
> break;
> case '?':
> MSG("Commandline parameter is invalid.\n");
>diff --git a/makedumpfile.h b/makedumpfile.h
>index 533e5b8..6a0f51e 100644
>--- a/makedumpfile.h
>+++ b/makedumpfile.h
>@@ -1004,7 +1004,7 @@ struct page_data
> };
>
> struct thread_args {
>- int thread_num;
>+ int producer_num;
> unsigned long len_buf_out;
> struct cycle *cycle;
> struct page_data *page_data_buf;
>@@ -1294,7 +1294,7 @@ struct DumpInfo {
> /*
> * for parallel process
> */
>- int num_threads;
>+ int num_producers;
> int num_buffers;
> pthread_t **threads;
> struct thread_args *kdump_thread_args;
>--
>1.8.3.1
>
>
More information about the kexec
mailing list