[PATCH v2 03/12] KVM: x86: Expose TSC offset controls to userspace
Paolo Bonzini
pbonzini at redhat.com
Sun Jul 18 13:42:43 PDT 2021
On 16/07/21 23:26, Oliver Upton wrote:
> To date, VMM-directed TSC synchronization and migration has been a bit
> messy. KVM has some baked-in heuristics around TSC writes to infer if
> the VMM is attempting to synchronize. This is problematic, as it depends
> on host userspace writing to the guest's TSC within 1 second of the last
> write.
>
> A much cleaner approach to configuring the guest's views of the TSC is to
> simply migrate the TSC offset for every vCPU. Offsets are idempotent,
> and thus not subject to change depending on when the VMM actually
> reads/writes values from/to KVM. The VMM can then read the TSC once with
> KVM_GET_CLOCK to capture a (realtime, host_tsc) pair at the instant when
> the guest is paused.
>
> Cc: David Matlack <dmatlack at google.com>
> Signed-off-by: Oliver Upton <oupton at gooogle.com>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/include/uapi/asm/kvm.h | 4 +
> arch/x86/kvm/x86.c | 166 ++++++++++++++++++++++++++++++++
> 3 files changed, 171 insertions(+)
This is missing documentation. The documentation should also include
the algorithm in https://www.spinics.net/lists/kvm-arm/msg47383.html
(modulo the fact that KVM_GET/SET_CLOCK return or pass realtime_ns
rather than kvmclock_ns - relatime_ns; which is fine of course).
Paolo
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e527d7259415..45134b7b14d6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1070,6 +1070,7 @@ struct kvm_arch {
> u64 last_tsc_nsec;
> u64 last_tsc_write;
> u32 last_tsc_khz;
> + u64 last_tsc_offset;
> u64 cur_tsc_nsec;
> u64 cur_tsc_write;
> u64 cur_tsc_offset;
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index a6c327f8ad9e..0b22e1e84e78 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -503,4 +503,8 @@ struct kvm_pmu_event_filter {
> #define KVM_PMU_EVENT_ALLOW 0
> #define KVM_PMU_EVENT_DENY 1
>
> +/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
> +#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
> +#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
> +
> #endif /* _ASM_X86_KVM_H */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e1b7c8b67428..d22de0a1988a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2411,6 +2411,11 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
> static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
> }
>
> +static u64 kvm_vcpu_read_tsc_offset(struct kvm_vcpu *vcpu)
> +{
> + return vcpu->arch.l1_tsc_offset;
> +}
> +
> static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
> {
> vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
> @@ -2467,6 +2472,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
> kvm->arch.last_tsc_nsec = ns;
> kvm->arch.last_tsc_write = tsc;
> kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
> + kvm->arch.last_tsc_offset = offset;
>
> vcpu->arch.last_guest_tsc = tsc;
>
> @@ -4914,6 +4920,136 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> +static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
> + struct kvm_device_attr *attr)
> +{
> + int r;
> +
> + switch (attr->attr) {
> + case KVM_VCPU_TSC_OFFSET:
> + r = 0;
> + break;
> + default:
> + r = -ENXIO;
> + }
> +
> + return r;
> +}
> +
> +static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
> + struct kvm_device_attr *attr)
> +{
> + void __user *uaddr = (void __user *)attr->addr;
> + int r;
> +
> + switch (attr->attr) {
> + case KVM_VCPU_TSC_OFFSET: {
> + u64 offset;
> +
> + offset = kvm_vcpu_read_tsc_offset(vcpu);
> + r = -EFAULT;
> + if (copy_to_user(uaddr, &offset, sizeof(offset)))
> + break;
> +
> + r = 0;
> + }
> + default:
> + r = -ENXIO;
> + }
> +
> + return r;
> +}
> +
> +static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
> + struct kvm_device_attr *attr)
> +{
> + void __user *uaddr = (void __user *)attr->addr;
> + struct kvm *kvm = vcpu->kvm;
> + int r;
> +
> + switch (attr->attr) {
> + case KVM_VCPU_TSC_OFFSET: {
> + u64 offset, tsc, ns;
> + unsigned long flags;
> + bool matched;
> +
> + r = -EFAULT;
> + if (copy_from_user(&offset, uaddr, sizeof(offset)))
> + break;
> +
> + raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
> +
> + matched = (vcpu->arch.virtual_tsc_khz &&
> + kvm->arch.last_tsc_khz == vcpu->arch.virtual_tsc_khz &&
> + kvm->arch.last_tsc_offset == offset);
> +
> + tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset;
> + ns = get_kvmclock_base_ns();
> +
> + __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched);
> + raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
> +
> + r = 0;
> + break;
> + }
> + default:
> + r = -ENXIO;
> + }
> +
> + return r;
> +}
> +
> +static int kvm_vcpu_ioctl_has_device_attr(struct kvm_vcpu *vcpu,
> + struct kvm_device_attr *attr)
> +{
> + int r;
> +
> + switch (attr->group) {
> + case KVM_VCPU_TSC_CTRL:
> + r = kvm_arch_tsc_has_attr(vcpu, attr);
> + break;
> + default:
> + r = -ENXIO;
> + break;
> + }
> +
> + return r;
> +}
> +
> +static int kvm_vcpu_ioctl_get_device_attr(struct kvm_vcpu *vcpu,
> + struct kvm_device_attr *attr)
> +{
> + int r;
> +
> + switch (attr->group) {
> + case KVM_VCPU_TSC_CTRL:
> + r = kvm_arch_tsc_get_attr(vcpu, attr);
> + break;
> + default:
> + r = -ENXIO;
> + break;
> + }
> +
> + return r;
> +}
> +
> +static int kvm_vcpu_ioctl_set_device_attr(struct kvm_vcpu *vcpu,
> + struct kvm_device_attr *attr)
> +{
> + int r;
> +
> + switch (attr->group) {
> + case KVM_VCPU_TSC_CTRL:
> + r = kvm_arch_tsc_set_attr(vcpu, attr);
> + break;
> + default:
> + r = -ENXIO;
> + break;
> + }
> +
> + return r;
> +}
> +
> static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
> struct kvm_enable_cap *cap)
> {
> @@ -5368,6 +5504,36 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> r = __set_sregs2(vcpu, u.sregs2);
> break;
> }
> + case KVM_HAS_DEVICE_ATTR: {
> + struct kvm_device_attr attr;
> +
> + r = -EFAULT;
> + if (copy_from_user(&attr, argp, sizeof(attr)))
> + goto out;
> +
> + r = kvm_vcpu_ioctl_has_device_attr(vcpu, &attr);
> + break;
> + }
> + case KVM_GET_DEVICE_ATTR: {
> + struct kvm_device_attr attr;
> +
> + r = -EFAULT;
> + if (copy_from_user(&attr, argp, sizeof(attr)))
> + goto out;
> +
> + r = kvm_vcpu_ioctl_get_device_attr(vcpu, &attr);
> + break;
> + }
> + case KVM_SET_DEVICE_ATTR: {
> + struct kvm_device_attr attr;
> +
> + r = -EFAULT;
> + if (copy_from_user(&attr, argp, sizeof(attr)))
> + goto out;
> +
> + r = kvm_vcpu_ioctl_set_device_attr(vcpu, &attr);
> + break;
> + }
> default:
> r = -EINVAL;
> }
>
More information about the linux-arm-kernel
mailing list