[PATCH v3 00/41] Optimize KVM/ARM for VHE systems

Tomasz Nowicki tn at semihalf.com
Fri Feb 2 02:05:40 PST 2018


On 01.02.2018 17:15, Yury Norov wrote:
> On Thu, Feb 01, 2018 at 02:57:59PM +0100, Tomasz Nowicki wrote:
>> Hi Christoffer,
>>
>> I created simple module for VM kernel. It is spinning on PSCI version
>> hypercall to measure the base exit cost as you suggested. Also, I measured
>> CPU cycles for each loop and here are my results:
>>
>> My setup:
>> 1-socket ThunderX2 running VM - 1VCPU
>>
>> Tested baselines:
>> a) host kernel v4.15-rc3 and VM kernel v4.15-rc3
>> b) host kernel v4.15-rc3 + vhe-optimize-v3-with-fixes and VM kernel
>> v4.15-rc3
>>
>> Module was loaded from VM and the results are presented in [%] relative to
>> average CPU cycles spending on PSCI version hypercall for vanilla VHE host
>> kernel v4.15-rc3:
>>
>>               VHE  |  nVHE
>> =========================
>> baseline a)  100% |  130%
>> =========================
>> baseline a)  36%  |  123%
>>
>> So I confirm significant performance improvement, especially for VHE case.
>> Additionally, I run network throughput tests with vhost-net but for that
>> case no differences.
> 
> Hi Tomasz,
> 
> Can you share your test?
> 

Yes:

#include <linux/arm-smccc.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/psci.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>

#include <uapi/linux/psci.h>

#define SAMPLE_N	(10000UL)
#define SAMPLES		(500)
#define CPU_PINNED	(10)

static struct task_struct *kvm_bench_task;

static unsigned long __invoke_psci_fn_hvc(unsigned long function_id,
			unsigned long arg0, unsigned long arg1,
			unsigned long arg2)
{
	struct arm_smccc_res res;

	arm_smccc_hvc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res);
	return res.a0;
}

static u32 psci_get_version(void)
{
	return __invoke_psci_fn_hvc(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
}

static inline u64 get_cycles_custom(void)
{
	register u64 c;
	__asm__ volatile("mrs %0, cntvct_el0" : "=r"(c));
	return c;
}

static int kvm_bench_kthread(void *none)
{
	int test_iter, out = SAMPLES;
	u64 time_before, time;
         u32 ver = psci_get_version();

	printk(KERN_INFO "Starting kvm exit cost test, using PSCI get version 
hypercall");
	printk(KERN_INFO "Obtained PSCIv%d.%d\n", PSCI_VERSION_MAJOR(ver),
	       PSCI_VERSION_MINOR(ver));

	for (test_iter = 0;; test_iter++) {
		if (!(test_iter % SAMPLE_N)) {
			time_before = get_cycles_custom();
		}

		psci_get_version();

		if (!(test_iter % SAMPLE_N)) {
			while (!out--) {
				kvm_bench_task = NULL;
				do_exit(0);
			}
			time = get_cycles_custom() - time_before;
			printk(KERN_INFO "iter takes %llu cycles. \n", time);
			if (kthread_should_stop())
				break;
			schedule();
		}
	}

	return 0;
}

static int __init kvm_bench_init(void)
{
	int err;

	printk(KERN_INFO "KVM exit cost benchmark\n");

	kvm_bench_task = kthread_create(kvm_bench_kthread, NULL, "kvm_test");
	if(IS_ERR(kvm_bench_task)) {
		printk(KERN_INFO "Unable to start thread.\n");
		err = PTR_ERR(kvm_bench_task);
		return err;
	}
	kthread_bind(kvm_bench_task, CPU_PINNED);
	wake_up_process(kvm_bench_task);
	return 0;
}

static void __exit kvm_bench_cleanup(void)
{
	printk(KERN_INFO "KVM benchmark cleaning up\n");
	if (kvm_bench_task)
		kthread_stop(kvm_bench_task);
}

module_init(kvm_bench_init);
module_exit(kvm_bench_cleanup);


Thanks,
Tomasz



More information about the linux-arm-kernel mailing list