--- makedumpfile.ori 2015-06-28 23:32:36.187000000 -0400 +++ makedumpfile.c 2015-06-29 01:51:55.884000000 -0400 @@ -26,6 +26,46 @@ #include #include +unsigned long write_kdump_pages_parallel_cyclic_time = 0; +struct timeval write_kdump_pages_parallel_cyclic1, write_kdump_pages_parallel_cyclic2; +unsigned long loop_time = 0; +unsigned long consume_time = 0; +struct timeval loop1, loop2; +unsigned long check_time = 0; +struct timeval check1, check2; +struct timeval write1, write2; +unsigned long write_time; +struct timeval lock1, lock2; +unsigned long lock_time; +struct timeval hit1, hit2; +unsigned long hit_time; +struct timeval find1, find2; +unsigned long find_time; +struct timeval timea1, timea2; +unsigned long timea; +struct timeval timeb1, timeb2; +unsigned long read_time[4]; +struct timeval read1[4], read2[4]; +unsigned long lock_current_time[4]; +struct timeval lock_current1[4], lock_current2[4]; +unsigned long found_time[4]; +struct timeval found1[4], found2[4]; +unsigned long lock_consumed_time[4]; +struct timeval lock_consumed1[4], lock_consumed2[4]; +unsigned long compress_time[4]; +struct timeval compress_time1[4], compress_time2[4]; +unsigned long timeb; +unsigned long count1 = 0; +unsigned long count2 = 0; +unsigned long count3 = 0; +unsigned long count4 = 0; +unsigned long count5 = 0; +unsigned long count6 = 0; +unsigned long count7 = 0; +unsigned long count8 = 0; +unsigned long count9 = 0; + + struct symbol_table symbol_table; struct size_table size_table; struct offset_table offset_table; @@ -6944,6 +6984,7 @@ unsigned long len_buf_out_snappy = snappy_max_compressed_length(info->page_size); #endif +int thread_num = kdump_thread_args->thread_num; buf = BUF_PARALLEL(kdump_thread_args->thread_num); buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num); @@ -6964,17 +7005,20 @@ while (1) { /* get next pfn */ +gettimeofday(&lock_current1[thread_num], NULL); pthread_mutex_lock(&info->current_pfn_mutex); pfn = info->current_pfn; info->current_pfn++; pthread_mutex_unlock(&info->current_pfn_mutex); - +gettimeofday(&lock_current2[thread_num], NULL); +lock_current_time[thread_num] += (lock_current2[thread_num].tv_sec - lock_current1[thread_num].tv_sec) * 1000000 + (lock_current2[thread_num].tv_usec - lock_current1[thread_num].tv_usec); if (pfn >= kdump_thread_args->end_pfn) break; index = -1; found = FALSE; +gettimeofday(&found1[thread_num], NULL); while (found == FALSE) { /* * need a cancellation point here @@ -6983,15 +7027,23 @@ index = pfn % page_data_num; - if (page_data_buf[index].ready != 0) + if (page_data_buf[index].ready != 0){ continue; +} - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) + if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0){ continue; +} - if (page_data_buf[index].ready != 0) + if (page_data_buf[index].ready != 0){ +count1++; goto unlock; +} +gettimeofday(&found2[thread_num], NULL); +found_time[thread_num] += (found2[thread_num].tv_sec - found1[thread_num].tv_sec) * 1000000 + (found2[thread_num].tv_usec - found1[thread_num].tv_usec); +gettimeofday(&found1[thread_num], NULL); +gettimeofday(&lock_consumed1[thread_num], NULL); pthread_mutex_lock(&info->consumed_pfn_mutex); if ((long)page_data_buf[index].pfn > (long)info->consumed_pfn) @@ -6999,11 +7051,15 @@ consumed_pfn = info->consumed_pfn; pthread_mutex_unlock(&info->consumed_pfn_mutex); +gettimeofday(&lock_consumed2[thread_num], NULL); +lock_consumed_time[thread_num] += (lock_consumed2[thread_num].tv_sec - lock_consumed1[thread_num].tv_sec) * 1000000 + (lock_consumed2[thread_num].tv_usec - lock_consumed1[thread_num].tv_usec); /* * leave space for slow producer */ - if ((long)pfn - (long)consumed_pfn > page_data_num) + if ((long)pfn - (long)consumed_pfn > page_data_num){ +count2++; goto unlock; +} found = TRUE; @@ -7025,11 +7081,16 @@ page_data_buf[index].dumpable = TRUE; +gettimeofday(&read1[thread_num], NULL); if (!read_pfn_parallel(fd_memory, pfn, buf, &bitmap_memory_parallel, mmap_cache)) goto fail; +gettimeofday(&read2[thread_num], NULL); +read_time[thread_num] += (read2[thread_num].tv_sec - read1[thread_num].tv_sec) * 1000000 + (read2[thread_num].tv_usec - read1[thread_num].tv_usec); + +gettimeofday(&compress_time1[thread_num], NULL); filter_data_buffer_parallel(buf, pfn_to_paddr(pfn), info->page_size, &info->filter_mutex); @@ -7090,6 +7151,8 @@ page_data_buf[index].size = info->page_size; memcpy(page_data_buf[index].buf, buf, info->page_size); } +gettimeofday(&compress_time2[thread_num], NULL); +compress_time[thread_num] += (compress_time2[thread_num].tv_sec - compress_time1[thread_num].tv_sec) * 1000000 + (compress_time2[thread_num].tv_usec - compress_time1[thread_num].tv_usec); unlock: pthread_mutex_unlock(&page_data_buf[index].mutex); @@ -7220,6 +7283,7 @@ gettimeofday(&last, NULL); while (consuming_pfn < end_pfn) { +gettimeofday(&loop1, NULL); index = consuming_pfn % page_data_num; gettimeofday(&new, NULL); @@ -7232,17 +7296,26 @@ * check pfn first without mutex locked to reduce the time * trying to lock the mutex */ - if (page_data_buf[index].pfn != consuming_pfn) + if (page_data_buf[index].pfn != consuming_pfn){ +gettimeofday(&loop2, NULL); +loop_time += (loop2.tv_sec - loop1.tv_sec) * 1000000 + (loop2.tv_usec - loop1.tv_usec); continue; +} +gettimeofday(&find1, NULL); +gettimeofday(&lock1, NULL); pthread_mutex_lock(&page_data_buf[index].mutex); +gettimeofday(&lock2, NULL); +lock_time += (lock2.tv_sec - lock1.tv_sec) * 1000000 + (lock2.tv_usec - lock1.tv_usec); + /* check whether the found one is ready to be consumed */ if (page_data_buf[index].pfn != consuming_pfn || page_data_buf[index].ready != 1) { goto unlock; } +gettimeofday(&hit1, NULL); if ((num_dumped % per) == 0) print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); @@ -7256,6 +7329,7 @@ num_dumped++; +gettimeofday(&write1, NULL); if (page_data_buf[index].zero == TRUE) { if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) goto out; @@ -7278,8 +7352,17 @@ goto out; } + +gettimeofday(&write2, NULL); +write_time = (write2.tv_sec - write1.tv_sec) * 1000000 + (write2.tv_usec - write1.tv_usec); + unlock: pthread_mutex_unlock(&page_data_buf[index].mutex); +gettimeofday(&hit2, NULL); +gettimeofday(&find2, NULL); +hit_time += (hit2.tv_sec - hit1.tv_sec) * 1000000 + (hit2.tv_usec - hit1.tv_usec); +find_time += (find2.tv_sec - find1.tv_sec) * 1000000 + (find2.tv_usec - find1.tv_usec); + } ret = TRUE; @@ -10314,6 +10397,11 @@ int main(int argc, char *argv[]) { + unsigned long total_time = 0; + struct timeval start_time, end_time; + gettimeofday(&start_time, NULL); + + int i, opt, flag_debug = FALSE; if ((info = calloc(1, sizeof(struct DumpInfo))) == NULL) { @@ -10328,7 +10416,6 @@ goto out; } initialize_tables(); - /* * By default, makedumpfile assumes that multi-cycle processing is * necessary to work in constant memory space. @@ -10642,5 +10729,32 @@ } free_elf_info(); + gettimeofday(&end_time, NULL); + total_time = (end_time.tv_sec - start_time.tv_sec) * 1000000 + (end_time.tv_usec - start_time.tv_usec); + MSG("lock time: %lds%ldus\n", lock_time / 1000000, lock_time % 1000000); + MSG("write time: %lds%ldus\n", write_time / 1000000, write_time % 1000000); + MSG("hit time: %lds%ldus\n", hit_time / 1000000, hit_time % 1000000); + MSG("find time: %lds%ldus\n", find_time / 1000000, find_time % 1000000); + MSG("loop_time: %lds%ldus\n", loop_time / 1000000, loop_time % 1000000); + MSG("thread consume_time: %lds%ldus\n", consume_time / 1000000, consume_time % 1000000); + MSG("thread timea: %lds%ldus\n", timea / 1000000, timea % 1000000); + MSG("thread timeb: %lds%ldus\n", timeb / 1000000, timeb % 1000000); +for (i = 0; i < 4; i++){ + MSG("read_time[%d]: %lds%ldus\n", i, read_time[i] / 1000000, read_time[i] % 1000000); + MSG("lock_current_time[%d]: %lds%ldus\n", i, lock_current_time[i] / 1000000, lock_current_time[i] % 1000000); + MSG("found_time[%d]: %lds%ldus\n", i, found_time[i] / 1000000, found_time[i] % 1000000); + MSG("lock_consumed_time[%d]: %lds%ldus\n", i, lock_consumed_time[i] / 1000000, lock_consumed_time[i] % 1000000); + MSG("compress_time[%d]: %lds%ldus\n", i, compress_time[i] / 1000000, compress_time[i] % 1000000); +} + MSG("count1: %ld\n", count1); + MSG("count2: %ld\n", count2); + MSG("count3: %ld\n", count3); + MSG("count4: %ld\n", count4); + MSG("count4: %ld\n", count5); + MSG("count4: %ld\n", count6); + MSG("count4: %ld\n", count7); + MSG("exec time: %lds%ldus\n", total_time / 1000000, total_time % 1000000); + + return retcd; }