Files
linux/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
Sean Christopherson 62838fa5ea KVM: selftests: Relax assertion on HLT exits if CPU supports Idle HLT
If the CPU supports Idle HLT, which elides HLT VM-Exits if the vCPU has an
unmasked pending IRQ or NMI, relax the xAPIC IPI test's assertion on the
number of HLT exits to only require that the number of exits is less than
or equal to the number of HLT instructions that were executed.  I.e. don't
fail the test if Idle HLT does what it's supposed to do.

Note, unfortunately there's no way to determine if *KVM* supports Idle HLT,
as this_cpu_has() checks raw CPU support, and kvm_cpu_has() checks what can
be exposed to L1, i.e. the latter would check if KVM supports nested Idle
HLT.  But, since the assert is purely bonus coverage, checking for CPU
support is good enough.

Cc: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Link: https://lore.kernel.org/r/20250226231809.3183093-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-02-28 15:42:28 -08:00

501 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* xapic_ipi_test
*
* Copyright (C) 2020, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
* another vCPU that is halted when KVM's backing page for the APIC access
* address has been moved by mm.
*
* The test starts two vCPUs: one that sends IPIs and one that continually
* executes HLT. The sender checks that the halter has woken from the HLT and
* has reentered HLT before sending the next IPI. While the vCPUs are running,
* the host continually calls migrate_pages to move all of the process' pages
* amongst the available numa nodes on the machine.
*
* Migration is a command line option. When used on non-numa machines will
* exit with error. Test is still usefull on non-numa for testing IPIs.
*/
#include <getopt.h>
#include <pthread.h>
#include <inttypes.h>
#include <string.h>
#include <time.h>
#include "kvm_util.h"
#include "numaif.h"
#include "processor.h"
#include "test_util.h"
#include "vmx.h"
/* Default running time for the test */
#define DEFAULT_RUN_SECS 3
/* Default delay between migrate_pages calls (microseconds) */
#define DEFAULT_DELAY_USECS 500000
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
* value should work.
*/
#define IPI_VECTOR 0xa5
/*
* Incremented in the IPI handler. Provides evidence to the sender that the IPI
* arrived at the destination
*/
static volatile uint64_t ipis_rcvd;
/* Data struct shared between host main thread and vCPUs */
struct test_data_page {
uint32_t halter_apic_id;
volatile uint64_t hlt_count;
volatile uint64_t wake_count;
uint64_t ipis_sent;
uint64_t migrations_attempted;
uint64_t migrations_completed;
uint32_t icr;
uint32_t icr2;
uint32_t halter_tpr;
uint32_t halter_ppr;
/*
* Record local version register as a cross-check that APIC access
* worked. Value should match what KVM reports (APIC_VERSION in
* arch/x86/kvm/lapic.c). If test is failing, check that values match
* to determine whether APIC access exits are working.
*/
uint32_t halter_lvr;
};
struct thread_params {
struct test_data_page *data;
struct kvm_vcpu *vcpu;
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
uint64_t base = GET_APIC_BASE(msr);
GUEST_ASSERT(base == APIC_DEFAULT_GPA);
}
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
xapic_enable();
data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
data->halter_lvr = xapic_read_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
* each time around to minimize window between signaling the pending
* halt to the sender vCPU and executing the halt. No need to disable on
* first run as this vCPU executes first and the host waits for it to
* signal going into first halt before starting the sender vCPU. Record
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
asm volatile("sti; hlt; cli");
data->wake_count++;
}
}
/*
* Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
* enable diagnosing errant writes to the APIC access address backing page in
* case of test failure.
*/
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
xapic_write_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
{
uint64_t last_wake_count;
uint64_t last_hlt_count;
uint64_t last_ipis_rcvd_count;
uint32_t icr_val;
uint32_t icr2_val;
uint64_t tsc_start;
verify_apic_base_addr();
xapic_enable();
/*
* Init interrupt command register for sending IPIs
*
* Delivery mode=fixed, per SDM:
* "Delivers the interrupt specified in the vector field to the target
* processor."
*
* Destination mode=physical i.e. specify target by its local APIC
* ID. This vCPU assumes that the halter vCPU has already started and
* set data->halter_apic_id.
*/
icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
data->icr = icr_val;
data->icr2 = icr2_val;
last_wake_count = data->wake_count;
last_hlt_count = data->hlt_count;
last_ipis_rcvd_count = ipis_rcvd;
for (;;) {
/*
* Send IPI to halter vCPU.
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
xapic_write_reg(APIC_ICR2, icr2_val);
xapic_write_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
* Wait up to ~1 sec for halter to indicate that it has:
* 1. Received the IPI
* 2. Woken up from the halt
* 3. Gone back into halt
* Current CPUs typically run at 2.x Ghz which is ~2
* billion ticks per second.
*/
tsc_start = rdtsc();
while (rdtsc() - tsc_start < 2000000000) {
if ((ipis_rcvd != last_ipis_rcvd_count) &&
(data->wake_count != last_wake_count) &&
(data->hlt_count != last_hlt_count))
break;
}
GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
(data->wake_count != last_wake_count) &&
(data->hlt_count != last_hlt_count));
last_wake_count = data->wake_count;
last_hlt_count = data->hlt_count;
last_ipis_rcvd_count = ipis_rcvd;
}
}
static void *vcpu_thread(void *arg)
{
struct thread_params *params = (struct thread_params *)arg;
struct kvm_vcpu *vcpu = params->vcpu;
struct ucall uc;
int old;
int r;
r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
TEST_ASSERT(r == 0,
"pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
vcpu->id, r);
fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
TEST_ASSERT(false,
"vCPU %u exited with error: %s.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu",
vcpu->id, (const char *)uc.args[0],
params->data->ipis_sent, params->data->hlt_count,
params->data->wake_count,
*params->pipis_rcvd, params->data->halter_tpr,
params->data->halter_ppr, params->data->halter_lvr,
params->data->migrations_attempted,
params->data->migrations_completed);
}
return NULL;
}
static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
{
void *retval;
int r;
r = pthread_cancel(thread);
TEST_ASSERT(r == 0,
"pthread_cancel on vcpu_id=%d failed with errno=%d",
vcpu->id, r);
r = pthread_join(thread, &retval);
TEST_ASSERT(r == 0,
"pthread_join on vcpu_id=%d failed with errno=%d",
vcpu->id, r);
TEST_ASSERT(retval == PTHREAD_CANCELED,
"expected retval=%p, got %p", PTHREAD_CANCELED,
retval);
}
void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
uint64_t *pipis_rcvd)
{
long pages_not_moved;
unsigned long nodemask = 0;
unsigned long nodemasks[sizeof(nodemask) * 8];
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
int i, r;
int from, to;
unsigned long bit;
uint64_t hlt_count;
uint64_t wake_count;
uint64_t ipis_sent;
fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
delay_usecs);
/* Get set of first 64 numa nodes available */
r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
sizeof(nodemask) * 8, nodemask);
/* Init array of masks containing a single-bit in each, one for each
* available node. migrate_pages called below requires specifying nodes
* as bit masks.
*/
for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
if (nodemask & bit) {
nodemasks[nodes] = nodemask & bit;
nodes++;
}
}
TEST_ASSERT(nodes > 1,
"Did not find at least 2 numa nodes. Can't do migration");
fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
from = 0;
to = 1;
start_time = time(NULL);
last_update = start_time;
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
wake_count = data->wake_count;
while ((int)(time(NULL) - start_time) < run_secs) {
data->migrations_attempted++;
/*
* migrate_pages with PID=0 will migrate all pages of this
* process between the nodes specified as bitmasks. The page
* backing the APIC access address belongs to this process
* because it is allocated by KVM in the context of the
* KVM_CREATE_VCPU ioctl. If that assumption ever changes this
* test may break or give a false positive signal.
*/
pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
&nodemasks[from],
&nodemasks[to]);
if (pages_not_moved < 0)
fprintf(stderr,
"migrate_pages failed, errno=%d\n", errno);
else if (pages_not_moved > 0)
fprintf(stderr,
"migrate_pages could not move %ld pages\n",
pages_not_moved);
else
data->migrations_completed++;
from = to;
to++;
if (to == nodes)
to = 0;
now = time(NULL);
if (((now - start_time) % interval_secs == 0) &&
(now != last_update)) {
last_update = now;
fprintf(stderr,
"%lu seconds: Migrations attempted=%lu completed=%lu, "
"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
now - start_time, data->migrations_attempted,
data->migrations_completed,
data->ipis_sent, *pipis_rcvd,
data->hlt_count, data->wake_count);
TEST_ASSERT(ipis_sent != data->ipis_sent &&
hlt_count != data->hlt_count &&
wake_count != data->wake_count,
"IPI, HLT and wake count have not increased "
"in the last %lu seconds. "
"HLTer is likely hung.", interval_secs);
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
wake_count = data->wake_count;
}
usleep(delay_usecs);
}
}
void get_cmdline_args(int argc, char *argv[], int *run_secs,
bool *migrate, int *delay_usecs)
{
for (;;) {
int opt = getopt(argc, argv, "s:d:m");
if (opt == -1)
break;
switch (opt) {
case 's':
*run_secs = parse_size(optarg);
break;
case 'm':
*migrate = true;
break;
case 'd':
*delay_usecs = parse_size(optarg);
break;
default:
TEST_ASSERT(false,
"Usage: -s <runtime seconds>. Default is %d seconds.\n"
"-m adds calls to migrate_pages while vCPUs are running."
" Default is no migrations.\n"
"-d <delay microseconds> - delay between migrate_pages() calls."
" Default is %d microseconds.",
DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
}
}
}
int main(int argc, char *argv[])
{
int r;
int wait_secs;
const int max_halter_wait = 10;
int run_secs = 0;
int delay_usecs = 0;
struct test_data_page *data;
vm_vaddr_t test_data_page_vaddr;
bool migrate = false;
pthread_t threads[2];
struct thread_params params[2];
struct kvm_vm *vm;
uint64_t *pipis_rcvd;
get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
if (run_secs <= 0)
run_secs = DEFAULT_RUN_SECS;
if (delay_usecs <= 0)
delay_usecs = DEFAULT_DELAY_USECS;
vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
test_data_page_vaddr = vm_vaddr_alloc_page(vm);
data = addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
params[0].data = data;
params[1].data = data;
vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
params[0].pipis_rcvd = pipis_rcvd;
params[1].pipis_rcvd = pipis_rcvd;
/* Start halter vCPU thread and wait for it to execute first HLT. */
r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
TEST_ASSERT(r == 0,
"pthread_create halter failed errno=%d", errno);
fprintf(stderr, "Halter vCPU thread started\n");
wait_secs = 0;
while ((wait_secs < max_halter_wait) && !data->hlt_count) {
sleep(1);
wait_secs++;
}
TEST_ASSERT(data->hlt_count,
"Halter vCPU did not execute first HLT within %d seconds",
max_halter_wait);
fprintf(stderr,
"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
data->halter_apic_id, wait_secs);
r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
fprintf(stderr,
"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
run_secs);
if (!migrate)
sleep(run_secs);
else
do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
/*
* Cancel threads and wait for them to stop.
*/
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
/*
* If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
* then the number of HLT exits may be less than the number of HLTs
* that were executed, as Idle HLT elides the exit if the vCPU has an
* unmasked, pending IRQ (or NMI).
*/
if (this_cpu_has(X86_FEATURE_IDLE_HLT))
TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
"HLT insns = %lu, HLT exits = %lu",
data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
else
TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter APIC ID=%#x\n"
"Sender ICR value=%#x ICR2 value=%#x\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu\n",
run_secs, data->ipis_sent,
data->hlt_count, data->wake_count, *pipis_rcvd,
data->halter_apic_id,
data->icr, data->icr2,
data->halter_tpr, data->halter_ppr, data->halter_lvr,
data->migrations_attempted, data->migrations_completed);
kvm_vm_free(vm);
return 0;
}