Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:

 - Optimize perf_sample_data layout

 - Prepare sample data handling for BPF integration

 - Update the x86 PMU driver for Intel Meteor Lake

 - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids)
   discovery breakage

 - Fix the x86 Zhaoxin PMU driver

 - Cleanups

* tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  perf/x86/intel/uncore: Add Meteor Lake support
  x86/perf/zhaoxin: Add stepping check for ZXC
  perf/x86/intel/ds: Fix the conversion from TSC to perf time
  perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
  perf/x86/uncore: Add a quirk for UPI on SPR
  perf/x86/uncore: Ignore broken units in discovery table
  perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
  perf/x86/uncore: Factor out uncore_device_to_die()
  perf/core: Call perf_prepare_sample() before running BPF
  perf/core: Introduce perf_prepare_header()
  perf/core: Do not pass header for sample ID init
  perf/core: Set data->sample_flags in perf_prepare_sample()
  perf/core: Add perf_sample_save_brstack() helper
  perf/core: Add perf_sample_save_raw_data() helper
  perf/core: Add perf_sample_save_callchain() helper
  perf/core: Save the dynamic parts of sample data size
  x86/kprobes: Use switch-case for 0xFF opcodes in prepare_emulation
  perf/core: Change the layout of perf_sample_data
  perf/x86/msr: Add Meteor Lake support
  perf/x86/cstate: Add Meteor Lake support
  ...
This commit is contained in:
Linus Torvalds
2023-02-20 17:29:55 -08:00
25 changed files with 963 additions and 327 deletions

View File

@@ -7041,13 +7041,20 @@ out_put:
ring_buffer_put(rb);
}
static void __perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
/*
* A set of common sample data types saved even for non-sample records
* when event->attr.sample_id_all is set.
*/
#define PERF_SAMPLE_ID_ALL (PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
static void __perf_event_header__init_id(struct perf_sample_data *data,
struct perf_event *event,
u64 sample_type)
{
data->type = event->attr.sample_type;
header->size += event->id_header_size;
data->sample_flags |= data->type & PERF_SAMPLE_ID_ALL;
if (sample_type & PERF_SAMPLE_TID) {
/* namespace issues */
@@ -7074,8 +7081,10 @@ void perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event)
{
if (event->attr.sample_id_all)
__perf_event_header__init_id(header, data, event, event->attr.sample_type);
if (event->attr.sample_id_all) {
header->size += event->id_header_size;
__perf_event_header__init_id(data, event, event->attr.sample_type);
}
}
static void __perf_event__output_id_sample(struct perf_output_handle *handle,
@@ -7305,7 +7314,7 @@ void perf_output_sample(struct perf_output_handle *handle,
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
if (data->br_stack) {
size_t size;
size = data->br_stack->nr
@@ -7549,83 +7558,68 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
return callchain ?: &__empty_callchain;
}
void perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
{
return d * !!(flags & s);
}
void perf_prepare_sample(struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
u64 sample_type = event->attr.sample_type;
u64 filtered_sample_type;
header->type = PERF_RECORD_SAMPLE;
header->size = sizeof(*header) + event->header_size;
header->misc = 0;
header->misc |= perf_misc_flags(regs);
/*
* Clear the sample flags that have already been done by the
* PMU driver.
* Add the sample flags that are dependent to others. And clear the
* sample flags that have already been done by the PMU driver.
*/
filtered_sample_type = sample_type & ~data->sample_flags;
__perf_event_header__init_id(header, data, event, filtered_sample_type);
filtered_sample_type = sample_type;
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_CODE_PAGE_SIZE,
PERF_SAMPLE_IP);
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_DATA_PAGE_SIZE |
PERF_SAMPLE_PHYS_ADDR, PERF_SAMPLE_ADDR);
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_STACK_USER,
PERF_SAMPLE_REGS_USER);
filtered_sample_type &= ~data->sample_flags;
if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
if (filtered_sample_type == 0) {
/* Make sure it has the correct data->type for output */
data->type = event->attr.sample_type;
return;
}
__perf_event_header__init_id(data, event, filtered_sample_type);
if (filtered_sample_type & PERF_SAMPLE_IP) {
data->ip = perf_instruction_pointer(regs);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr;
header->size += size * sizeof(u64);
data->sample_flags |= PERF_SAMPLE_IP;
}
if (sample_type & PERF_SAMPLE_RAW) {
struct perf_raw_record *raw = data->raw;
int size;
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(data, event, regs);
if (raw && (data->sample_flags & PERF_SAMPLE_RAW)) {
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
do {
sum += frag->size;
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
size = round_up(sum + sizeof(u32), sizeof(u64));
raw->size = size - sizeof(u32);
frag->pad = raw->size - sum;
} else {
size = sizeof(u64);
data->raw = NULL;
}
header->size += size;
if (filtered_sample_type & PERF_SAMPLE_RAW) {
data->raw = NULL;
data->dyn_size += sizeof(u64);
data->sample_flags |= PERF_SAMPLE_RAW;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
if (branch_sample_hw_index(event))
size += sizeof(u64);
size += data->br_stack->nr
* sizeof(struct perf_branch_entry);
}
header->size += size;
if (filtered_sample_type & PERF_SAMPLE_BRANCH_STACK) {
data->br_stack = NULL;
data->dyn_size += sizeof(u64);
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
if (filtered_sample_type & PERF_SAMPLE_REGS_USER)
perf_sample_regs_user(&data->regs_user, regs);
if (sample_type & PERF_SAMPLE_REGS_USER) {
/*
* It cannot use the filtered_sample_type here as REGS_USER can be set
* by STACK_USER (using __cond_set() above) and we don't want to update
* the dyn_size if it's not requested by users.
*/
if ((sample_type & ~data->sample_flags) & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
int size = sizeof(u64);
@@ -7634,10 +7628,11 @@ void perf_prepare_sample(struct perf_event_header *header,
size += hweight64(mask) * sizeof(u64);
}
header->size += size;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_REGS_USER;
}
if (sample_type & PERF_SAMPLE_STACK_USER) {
if (filtered_sample_type & PERF_SAMPLE_STACK_USER) {
/*
* Either we need PERF_SAMPLE_STACK_USER bit to be always
* processed as the last one or have additional check added
@@ -7645,9 +7640,10 @@ void perf_prepare_sample(struct perf_event_header *header,
* up the rest of the sample size.
*/
u16 stack_size = event->attr.sample_stack_user;
u16 header_size = perf_sample_data_size(data, event);
u16 size = sizeof(u64);
stack_size = perf_sample_ustack_size(stack_size, header->size,
stack_size = perf_sample_ustack_size(stack_size, header_size,
data->regs_user.regs);
/*
@@ -7659,24 +7655,31 @@ void perf_prepare_sample(struct perf_event_header *header,
size += sizeof(u64) + stack_size;
data->stack_user_size = stack_size;
header->size += size;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_STACK_USER;
}
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
data->weight.full = 0;
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC)
data->data_src.val = PERF_MEM_NA;
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;
if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
if (filtered_sample_type & PERF_SAMPLE_ADDR)
data->addr = 0;
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
if (sample_type & PERF_SAMPLE_REGS_INTR) {
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = PERF_MEM_NA;
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = 0;
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
}
if (filtered_sample_type & PERF_SAMPLE_ADDR) {
data->addr = 0;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
if (filtered_sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);
@@ -7688,20 +7691,23 @@ void perf_prepare_sample(struct perf_event_header *header,
size += hweight64(mask) * sizeof(u64);
}
header->size += size;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_REGS_INTR;
}
if (sample_type & PERF_SAMPLE_PHYS_ADDR &&
filtered_sample_type & PERF_SAMPLE_PHYS_ADDR)
if (filtered_sample_type & PERF_SAMPLE_PHYS_ADDR) {
data->phys_addr = perf_virt_to_phys(data->addr);
data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
}
#ifdef CONFIG_CGROUP_PERF
if (sample_type & PERF_SAMPLE_CGROUP) {
if (filtered_sample_type & PERF_SAMPLE_CGROUP) {
struct cgroup *cgrp;
/* protected by RCU */
cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
data->cgroup = cgroup_id(cgrp);
data->sample_flags |= PERF_SAMPLE_CGROUP;
}
#endif
@@ -7710,16 +7716,21 @@ void perf_prepare_sample(struct perf_event_header *header,
* require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
* but the value will not dump to the userspace.
*/
if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
if (filtered_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) {
data->data_page_size = perf_get_page_size(data->addr);
data->sample_flags |= PERF_SAMPLE_DATA_PAGE_SIZE;
}
if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
if (filtered_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) {
data->code_page_size = perf_get_page_size(data->ip);
data->sample_flags |= PERF_SAMPLE_CODE_PAGE_SIZE;
}
if (sample_type & PERF_SAMPLE_AUX) {
if (filtered_sample_type & PERF_SAMPLE_AUX) {
u64 size;
u16 header_size = perf_sample_data_size(data, event);
header->size += sizeof(u64); /* size */
header_size += sizeof(u64); /* size */
/*
* Given the 16bit nature of header::size, an AUX sample can
@@ -7727,14 +7738,26 @@ void perf_prepare_sample(struct perf_event_header *header,
* Make sure this doesn't happen by using up to U16_MAX bytes
* per sample in total (rounded down to 8 byte boundary).
*/
size = min_t(size_t, U16_MAX - header->size,
size = min_t(size_t, U16_MAX - header_size,
event->attr.aux_sample_size);
size = rounddown(size, 8);
size = perf_prepare_sample_aux(event, data, size);
WARN_ON_ONCE(size + header->size > U16_MAX);
header->size += size;
WARN_ON_ONCE(size + header_size > U16_MAX);
data->dyn_size += size + sizeof(u64); /* size above */
data->sample_flags |= PERF_SAMPLE_AUX;
}
}
void perf_prepare_header(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
header->type = PERF_RECORD_SAMPLE;
header->size = perf_sample_data_size(data, event);
header->misc = perf_misc_flags(regs);
/*
* If you're adding more sample types here, you likely need to do
* something about the overflowing header::size, like repurpose the
@@ -7762,7 +7785,8 @@ __perf_event_output(struct perf_event *event,
/* protect the callchain buffers */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
perf_prepare_sample(data, event, regs);
perf_prepare_header(&header, data, event, regs);
err = output_begin(&handle, data, event, header.size);
if (err)
@@ -10120,8 +10144,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
};
perf_sample_data_init(&data, 0, 0);
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
perf_trace_buf_update(record, event_type);
@@ -10328,13 +10351,7 @@ static void bpf_overflow_handler(struct perf_event *event,
rcu_read_lock();
prog = READ_ONCE(event->prog);
if (prog) {
if (prog->call_get_stack &&
(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
!(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
data->callchain = perf_callchain(event, regs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
perf_prepare_sample(data, event, regs);
ret = bpf_prog_run(prog, &ctx);
}
rcu_read_unlock();