I'm trying to measure perf events on a Intel Alder/Raptor Lake heterogeneous CPUs inside a program manager written in C. How can I enable those events and measure it inside the C program? I'm using the latest Linux kernel v6.3 that has more compatibility with those processors. I'm looking the perf.c and build-stat.c source code to enable the counters but I'm missing something. I would like to achieve something like:
void main(int argc, char \*\*argv)
{
printf("Usage: sudo ./perf-api-test 0 instructions\\n");
struct evlist* evlist = setup_events(argv[1], argv[2], "CPU");//cpu id and event names
enable_counters(evlist);
while(true)
{
do_my_stuff();
read_counters(evlist);
print_counters(evlist);
}
}
My API is now something like:
\#include \<linux/time64.h\>
\#include "util/stat.h"
\#include "util/thread_map.h"
\#include "util/target.h"
\#include "util/evsel.h"
\#include "util/evlist.h"
\#include "util/counts.h"
\#include "util/parse-events.h"
\#include "util/cpumap.h"
\#include "util/affinity.h"
\#include "libminiperf.h"
struct target target = {
.uid = UINT_MAX
};
struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
.scale = true,
//.unit_width = 4, /\* strlen("unit") \*/
//.run_count = 1,
// .metric_only_len = METRIC_ONLY_LEN,
// .walltime_nsecs_stats = &walltime_nsecs_stats,
// .big_num = true,
};
static inline void diff_timespec(struct timespec \*r, struct timespec \*a,
struct timespec \*b)
{
r-\>tv_sec = a-\>tv_sec - b-\>tv_sec;
if (a-\>tv_nsec \< b-\>tv_nsec)
{
r-\>tv_nsec = a-\>tv_nsec + NSEC_PER_SEC - b-\>tv_nsec;
r-\>tv_sec--;
}
else
{
r-\>tv_nsec = a-\>tv_nsec - b-\>tv_nsec ;
}
}
static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread/*, struct timespec *rs*/)
{
//switch(counter-\>tool_event) {
/*case PERF_TOOL_DURATION_TIME: {
u64 val = rs-\>tv_nsec + rs-\>tv_sec*1000000000ULL;
struct perf_counts_values *count =
perf_counts(counter-\>counts, cpu_map_idx, thread);
count-\>ena = count-\>run = val;
count-\>val = val;
return 0;
}*/
/*case PERF_TOOL_USER_TIME:
case PERF_TOOL_SYSTEM_TIME: {*/
u64 val;
struct perf_counts_values *count =
perf_counts(counter-\>counts, cpu_map_idx, thread);
if (counter-\>tool_event == PERF_TOOL_USER_TIME)
val = ru_stats.ru_utime_usec_stat.mean;
else
val = ru_stats.ru_stime_usec_stat.mean;
count-\>ena = count-\>run = val;
count-\>val = val;
return 0;
/*}
default:
case PERF_TOOL_NONE:
return evsel__read_counter(counter, cpu_map_idx, thread);
case PERF_TOOL_MAX:
fprintf(stderr,"ERROR: This should never be reached.\\n");
return 0;
}\*/
}
static int read_counter(struct evlist \*evsel_list, struct evsel \*counter)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int ncpus, cpu, thread;
if (!counter->supported)
return -ENOENT;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
if (!perf_counts__is_loaded(counter->counts, cpu, thread) && read_single_counter(counter, cpu, thread)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
return -1;
}
printf("DEBUG: read_counter CPU %d thread %d.\n",cpu,thread);
perf_counts__set_loaded(counter->counts, cpu, thread, false);
}
}
return 0;
}
void read_counters(struct evlist \*evsel_list, const char \*\*names, double \*results, const char \*\*units, bool \*snapshot, uint64_t \*enabled, uint64_t \*running)
{
struct evsel \*counter;
struct affinity \*affinity;
int ret;
affinity__setup(affinity);
evlist__for_each_entry(evsel_list, counter)
{
ret = read_counter(evsel_list, counter);
if (ret)
pr_debug("failed to read counter %s\\n", counter-\>name);
if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
}
evlist__for_each_entry(evsel_list, counter) {
if (counter-\>err)
pr_debug("failed to read counter %s\\n", counter-\>name);
if (counter-\>err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\\n", counter-\>name);
counter-\>err = 0;
}
perf_stat_merge_counters(&stat_config, evsel_list);
perf_stat_process_percore(&stat_config, evsel_list);
perf_stat_process_shadow_stats(&stat_config, evsel_list);
size_t i = 0;
evlist__for_each_entry(evsel_list, counter)
{
int nthreads = perf_thread_map__nr(counter-\>core.threads);
int ncpus, cpu, thread;
if (target__has_cpu(&target) && !target__has_per_thread(&target)) {
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
} else {
ncpus = 1;
}
uint64_t ena = 0, run = 0, val = 0;
for (thread = 0; thread < nthreads; thread++)
{
for (cpu = 0; cpu < ncpus; cpu++)
{
val += perf_counts(counter->counts, cpu, thread)->val;
ena += perf_counts(counter->counts, cpu, thread)->ena;
run += perf_counts(counter->counts, cpu, thread)->run;
}
assert(run <= ena);
}
if (names)
names[i] = counter->name;
if (results)
results[i] = val * counter->scale;
if (units)
units[i] = counter->unit;
if (snapshot)
snapshot[i] = counter->snapshot;
//snapshot[i] = true;
if (enabled)
enabled[i] = ena;
if (running)
running[i] = run;
i++;
}
}
void get_names(struct evlist \*evsel_list, const char \*\*names)
{
struct evsel \*counter;
printf("DEBUG: get_names should print the events names\\n");
size_t i = 0;
evlist__for_each_entry(evsel_list, counter)
{
printf("DEBUG: get_names event=%s\\n",counter-\>name);
if (names)
names\[i\] = counter-\>name;
i++;
}
printf("DEBUG: get_names end\\n");
}
void enable_counters(struct evlist *evsel_list)
{
/*
\* We need to enable counters only if:
\* - we don't have tracee (attaching to task or cpu)
\* - we have initial delay configured
\*/
evlist__enable(evsel_list);
}
void disable_counters(struct evlist *evsel_list)
{
/*
\* If we don't have tracee (attaching to task or cpu), counters may
\* still be running. To get accurate group ratios, we must stop groups
\* from counting before reading their constituent counters.
\*/
evlist__disable(evsel_list);
}
static int perf_stat_init_aggr_mode(struct evlist \*evsel_list)
{
int nr;
if (stat_config.aggr_mode == AGGR_THREAD) {
nr = perf_thread_map__nr(evsel_list->core.threads);
stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
if (stat_config.aggr_map == NULL)
return -ENOMEM;
for (int s = 0; s < nr; s++) {
struct aggr_cpu_id id = aggr_cpu_id__empty();
id.thread_idx = s;
stat_config.aggr_map->map[s] = id;
}
return 0;
}
/*
* The evsel_list->cpus is the base we operate on,
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
if (evsel_list->core.user_requested_cpus)
nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
else
nr = 0;
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
struct evlist\* setup_events(const char \*monitor_target, const char \*events, const char \*type)
{
/*int interval = stat_config.interval;
int times = stat_config.times;
int timeout = stat_config.timeout;*/
struct evlist \*evsel_list = NULL;
char errbuf\[BUFSIZ\];
//char msg\[BUFSIZ\];
// Assign PID or CPU depending on type
if (strcmp(type, "PID") == 0)
target.pid = monitor_target;
else if (strcmp(type, "TID") == 0)
target.tid = monitor_target;
else if (strcmp(type, "CPU") == 0)
target.cpu_list = monitor_target;
evsel_list = evlist__new();
if (evsel_list == NULL) {
pr_err("evsel_list is NULL");
return NULL;
}
int err = target__validate(&target);
if (err) {
target__strerror(&target, err, errbuf, BUFSIZ);
pr_warning("%s\n", errbuf);
}
if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) {
target.per_thread = true;
}
perf_stat__collect_metric_expr(evsel_list);
perf_stat__init_shadow_stats();
if (evlist__create_maps(evsel_list, &target) < 0) {
if (target__has_task(&target)) {
pr_err("Problems finding threads of monitor\n");
} else if (target__has_cpu(&target)) {
pr_err("Problems finding CPUs of monitor\n");
}
printf("DEBUG: error evlist__create_maps\n");
goto out;
}
if ((strcmp(type, "PID") == 0) | (strcmp(type, "TID") == 0)) {
thread_map__read_comms(evsel_list->core.threads);
}
if (perf_stat_init_aggr_mode(evsel_list)) {
printf("DEBUG: error perf_stat_init_aggr_mode\n");
goto out;
}
if (evlist__alloc_stats(&stat_config,evsel_list, true)) {
printf("DEBUG: error evlist__alloc_stats\n");
goto out;
}
struct affinity saved_affinity, *affinity = NULL;
if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
if (affinity__setup(&saved_affinity) < 0) {
printf("DEBUG: error affinity__setup\n");
goto out;
}
affinity = &saved_affinity;
}
struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
counter->reset_group = false;
}
struct evlist_cpu_iterator evlist_cpu_itr;
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity){
counter = evlist_cpu_itr.evsel;
if (create_perf_stat_counter(counter, &stat_config, &target, evlist_cpu_itr.cpu_map_idx - 1) < 0) {
printf("DEBUG: error create_perf_stat_counter\n");
goto out;
}
counter->supported = true;
}
if (evlist__apply_filters(evsel_list, &counter)) {
pr_err("failed to set filter \"%s\" on event %s with %d\n", counter->filter, evsel__name(counter), errno);
goto out;
}
/*
struct perf_evsel_config_term *err_term;
if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term))
{
pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
err_term->val.drv_cfg, perf_evsel__name(counter), errno, strerror(errno));
goto out;
}*/
return evsel_list;
out:
printf("DEBUG: go out error in setup_events\\n");
evlist__delete(evsel_list);
return NULL;
}
void print_counters(struct evlist \*evsel_list)
{
struct evsel \*counter;
int cont = 0;
evlist__for_each_entry(evsel_list, counter)
{
printf("Counter %d",cont);
cont++;
int nthreads = perf_thread_map__nr(counter-\>core.threads);
int ncpus, cpu, thread;
if (target__has_cpu(&target) && !target__has_per_thread(&target)) {
//ncpus = perf_evsel__nr_cpus(counter);
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
} else {
ncpus = 1;
}
uint64_t ena = 0, run = 0, val = 0;
double uval;
for (thread = 0; thread < nthreads; thread++)
{
for (cpu = 0; cpu < ncpus; cpu++)
{
val += perf_counts(counter->counts, cpu, thread)->val;
ena += perf_counts(counter->counts, cpu, thread)->ena;
run += perf_counts(counter->counts, cpu, thread)->run;
}
}
uval = val * counter->scale;
fprintf(stdout, "%f %s %s", uval, counter->unit, counter->name);
if (run != ena)
fprintf(stdout, " (%.2f%%)", 100.0 * run / ena);
fprintf(stdout, "\n");
}
}
void clean(struct evlist *evsel_list)
{
disable_counters(evsel_list);
read_counters(evsel_list, NULL, NULL, NULL, NULL, NULL, NULL);
evlist__close(evsel_list);
evlist__free_stats(evsel_list);
evlist__delete(evsel_list);
}
int num_entries(struct evlist \*evsel_list)
{
return evsel_list-\>core.nr_entries;
}
I'm trying to measure perf events on a Intel Alder/Raptor Lake heterogeneous CPUs inside a program manager written in C. Now I'm getting a segmentation fault when I'm creating the counters.