Measuring Perf events on Intel Alder/Raptor Lake CPUs inside a C program with perf code

Question

I'm trying to measure perf events on a Intel Alder/Raptor Lake heterogeneous CPUs inside a program manager written in C. How can I enable those events and measure it inside the C program? I'm using the latest Linux kernel v6.3 that has more compatibility with those processors. I'm looking the perf.c and build-stat.c source code to enable the counters but I'm missing something. I would like to achieve something like:

void main(int argc, char \*\*argv)
{
printf("Usage: sudo ./perf-api-test 0 instructions\\n");

    struct evlist* evlist = setup_events(argv[1], argv[2], "CPU");//cpu id and event names
    enable_counters(evlist);
    
    while(true)
    {
                do_my_stuff();
        read_counters(evlist);
        print_counters(evlist);
    }

}

My API is now something like:

\#include \<linux/time64.h\>

\#include "util/stat.h"
\#include "util/thread_map.h"
\#include "util/target.h"
\#include "util/evsel.h"
\#include "util/evlist.h"
\#include "util/counts.h"
\#include "util/parse-events.h"
\#include "util/cpumap.h"
\#include "util/affinity.h"

\#include "libminiperf.h"

struct target target = {
.uid = UINT_MAX
};

struct perf_stat_config stat_config = {
.aggr_mode      = AGGR_GLOBAL,
.scale          = true,
//.unit_width     = 4, /\* strlen("unit") \*/
//.run_count      = 1,
//    .metric_only_len    = METRIC_ONLY_LEN,
//    .walltime_nsecs_stats   = &walltime_nsecs_stats,
//    .big_num        = true,
};

static inline void diff_timespec(struct timespec \*r, struct timespec \*a,
struct timespec \*b)
{
r-\>tv_sec = a-\>tv_sec - b-\>tv_sec;
if (a-\>tv_nsec \< b-\>tv_nsec)
{
r-\>tv_nsec = a-\>tv_nsec + NSEC_PER_SEC - b-\>tv_nsec;
r-\>tv_sec--;
}
else
{
r-\>tv_nsec = a-\>tv_nsec - b-\>tv_nsec ;
}
}

static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread/*, struct timespec *rs*/)
{
//switch(counter-\>tool_event) {
/*case PERF_TOOL_DURATION_TIME: {
u64 val = rs-\>tv_nsec + rs-\>tv_sec*1000000000ULL;
struct perf_counts_values *count =
perf_counts(counter-\>counts, cpu_map_idx, thread);
count-\>ena = count-\>run = val;
count-\>val = val;
return 0;
}*/
/*case PERF_TOOL_USER_TIME:
case PERF_TOOL_SYSTEM_TIME: {*/
u64 val;
struct perf_counts_values *count =
perf_counts(counter-\>counts, cpu_map_idx, thread);
if (counter-\>tool_event == PERF_TOOL_USER_TIME)
val = ru_stats.ru_utime_usec_stat.mean;
else
val = ru_stats.ru_stime_usec_stat.mean;
count-\>ena = count-\>run = val;
count-\>val = val;
return 0;
/*}
default:
case PERF_TOOL_NONE:
return evsel__read_counter(counter, cpu_map_idx, thread);
case PERF_TOOL_MAX:
fprintf(stderr,"ERROR: This should never be reached.\\n");
return 0;
}\*/
}

  static int read_counter(struct evlist \*evsel_list, struct evsel \*counter)
  {

    int nthreads = perf_thread_map__nr(evsel_list->core.threads);
    int ncpus, cpu, thread;
    
    if (!counter->supported)
        return -ENOENT;
    
    for (thread = 0; thread < nthreads; thread++) {
        for (cpu = 0; cpu < ncpus; cpu++) {         
            if (!perf_counts__is_loaded(counter->counts, cpu, thread) && read_single_counter(counter, cpu, thread)) {
                counter->counts->scaled = -1;
                perf_counts(counter->counts, cpu, thread)->ena = 0;
                perf_counts(counter->counts, cpu, thread)->run = 0;
                return -1;
            }
            printf("DEBUG: read_counter CPU %d thread %d.\n",cpu,thread);
            perf_counts__set_loaded(counter->counts, cpu, thread, false);
        } 
    }
    
    return 0;

}

  void read_counters(struct evlist \*evsel_list, const char \*\*names, double \*results, const char \*\*units, bool \*snapshot, uint64_t \*enabled, uint64_t \*running)
  {
  struct evsel \*counter;
  struct affinity \*affinity;
  int ret;

  affinity__setup(affinity);

  evlist__for_each_entry(evsel_list, counter)
  {
  ret = read_counter(evsel_list, counter);
  if (ret)
  pr_debug("failed to read counter %s\\n", counter-\>name);

       if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
           pr_warning("failed to process counter %s\n", counter->name);

  }

  evlist__for_each_entry(evsel_list, counter) {
  if (counter-\>err)
  pr_debug("failed to read counter %s\\n", counter-\>name);
  if (counter-\>err == 0 && perf_stat_process_counter(&stat_config, counter))
  pr_warning("failed to process counter %s\\n", counter-\>name);
  counter-\>err = 0;
  }

  perf_stat_merge_counters(&stat_config, evsel_list);
  perf_stat_process_percore(&stat_config, evsel_list);
  perf_stat_process_shadow_stats(&stat_config, evsel_list);

  size_t i = 0;
  evlist__for_each_entry(evsel_list, counter)
  {
  int nthreads = perf_thread_map__nr(counter-\>core.threads);
  int ncpus, cpu, thread;

       if (target__has_cpu(&target) && !target__has_per_thread(&target)) {
        ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
       } else {
        ncpus = 1;
       }
      
       uint64_t ena = 0, run = 0, val = 0;
       for (thread = 0; thread < nthreads; thread++)
       {
        for (cpu = 0; cpu < ncpus; cpu++)
        {
            val += perf_counts(counter->counts, cpu, thread)->val;
            ena += perf_counts(counter->counts, cpu, thread)->ena;
            run += perf_counts(counter->counts, cpu, thread)->run;
        }
        assert(run <= ena);
       }
       if (names)
        names[i] = counter->name;
       if (results)
        results[i] = val * counter->scale;
       if (units)
        units[i] = counter->unit;
       if (snapshot)
        snapshot[i] = counter->snapshot;
        //snapshot[i] = true;
       if (enabled)
        enabled[i] = ena;
       if (running)
        running[i] = run;
       i++;
  }

}

void get_names(struct evlist \*evsel_list, const char \*\*names)
{
struct evsel \*counter;
printf("DEBUG: get_names should print the events names\\n");
size_t i = 0;
evlist__for_each_entry(evsel_list, counter)
{
printf("DEBUG: get_names event=%s\\n",counter-\>name);
if (names)
names\[i\] = counter-\>name;
i++;
}
printf("DEBUG: get_names end\\n");
}

void enable_counters(struct evlist *evsel_list)
{
/*
\* We need to enable counters only if:
\* - we don't have tracee (attaching to task or cpu)
\* - we have initial delay configured
\*/
evlist__enable(evsel_list);
}

void disable_counters(struct evlist *evsel_list)
{
/*
\* If we don't have tracee (attaching to task or cpu), counters may
\* still be running. To get accurate group ratios, we must stop groups
\* from counting before reading their constituent counters.
\*/
evlist__disable(evsel_list);
}

static int perf_stat_init_aggr_mode(struct evlist \*evsel_list)
{
int nr;
    if (stat_config.aggr_mode == AGGR_THREAD) {
        nr = perf_thread_map__nr(evsel_list->core.threads);
        stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
        if (stat_config.aggr_map == NULL)
        return -ENOMEM;
    
        for (int s = 0; s < nr; s++) {
            struct aggr_cpu_id id = aggr_cpu_id__empty();
            id.thread_idx = s;
            stat_config.aggr_map->map[s] = id;
        }
        return 0;
    } 
    /*
     * The evsel_list->cpus is the base we operate on,
     * taking the highest cpu number to be the size of
     * the aggregation translate cpumap.
     */
    if (evsel_list->core.user_requested_cpus)
        nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
    else
        nr = 0;
    stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
    return stat_config.cpus_aggr_map ? 0 : -ENOMEM;

}

struct evlist\* setup_events(const char \*monitor_target, const char \*events, const char \*type)
{
/*int interval = stat_config.interval;
int times = stat_config.times;
int timeout = stat_config.timeout;*/
struct evlist   \*evsel_list = NULL;
char errbuf\[BUFSIZ\];
//char msg\[BUFSIZ\];

    // Assign PID or CPU depending on type
    if (strcmp(type, "PID") == 0)
        target.pid = monitor_target;
    else if (strcmp(type, "TID") == 0)
        target.tid = monitor_target;
    else if (strcmp(type, "CPU") == 0)
        target.cpu_list = monitor_target;
    
    evsel_list = evlist__new();
    if (evsel_list == NULL) {
        pr_err("evsel_list is NULL");
        return NULL;
    }
  
    int err = target__validate(&target);
    if (err) {
        target__strerror(&target, err, errbuf, BUFSIZ);
        pr_warning("%s\n", errbuf);
    }
    
    if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) {
            target.per_thread = true;
    }
    
    perf_stat__collect_metric_expr(evsel_list);
    perf_stat__init_shadow_stats();
    
    if (evlist__create_maps(evsel_list, &target) < 0) {
        if (target__has_task(&target)) {
            pr_err("Problems finding threads of monitor\n");
        } else if (target__has_cpu(&target)) {
            pr_err("Problems finding CPUs of monitor\n");
        }
        printf("DEBUG: error evlist__create_maps\n");
        goto out;
    }
    if ((strcmp(type, "PID") == 0) | (strcmp(type, "TID") == 0)) {
        thread_map__read_comms(evsel_list->core.threads);
    }
    if (perf_stat_init_aggr_mode(evsel_list)) {
        printf("DEBUG: error perf_stat_init_aggr_mode\n");
        goto out;
    }
    
    if (evlist__alloc_stats(&stat_config,evsel_list, true)) {
        printf("DEBUG: error evlist__alloc_stats\n");
        goto out;
    }
    
    struct affinity saved_affinity, *affinity = NULL;
    if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
        if (affinity__setup(&saved_affinity) < 0) {
            printf("DEBUG: error affinity__setup\n");
            goto out;
        }
        affinity = &saved_affinity;
    }
  
    struct evsel *counter;
    evlist__for_each_entry(evsel_list, counter) {
        counter->reset_group = false;
    }
    
    struct evlist_cpu_iterator evlist_cpu_itr;
    evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity){
        counter = evlist_cpu_itr.evsel;
        if (create_perf_stat_counter(counter, &stat_config, &target, evlist_cpu_itr.cpu_map_idx - 1) < 0) {
            printf("DEBUG: error create_perf_stat_counter\n");
            goto out;
        }
        counter->supported = true;
    }
    
    if (evlist__apply_filters(evsel_list, &counter)) {
        pr_err("failed to set filter \"%s\" on event %s with %d\n", counter->filter, evsel__name(counter), errno);
        goto out;
    }
    /*
    struct perf_evsel_config_term *err_term;
    if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term))
    {
        pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
              err_term->val.drv_cfg, perf_evsel__name(counter), errno, strerror(errno));
        goto out;
    }*/
    
    return evsel_list;

out:
printf("DEBUG: go out error in setup_events\\n");
evlist__delete(evsel_list);
return NULL;
}

void print_counters(struct evlist \*evsel_list)
{
struct evsel \*counter;
int cont = 0;
evlist__for_each_entry(evsel_list, counter)
{
printf("Counter %d",cont);
cont++;
int nthreads = perf_thread_map__nr(counter-\>core.threads);
int ncpus, cpu, thread;

        if (target__has_cpu(&target) && !target__has_per_thread(&target)) {
            //ncpus = perf_evsel__nr_cpus(counter);
            ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
        } else {
            ncpus = 1;
        }
    
        uint64_t ena = 0, run = 0, val = 0;
        double uval;
    
        for (thread = 0; thread < nthreads; thread++)
        {
            for (cpu = 0; cpu < ncpus; cpu++)
            {
                val += perf_counts(counter->counts, cpu, thread)->val;
                ena += perf_counts(counter->counts, cpu, thread)->ena;
                run += perf_counts(counter->counts, cpu, thread)->run;
            }
        }
        uval = val * counter->scale;
        fprintf(stdout, "%f %s %s", uval, counter->unit, counter->name);
        if (run != ena)
            fprintf(stdout, "  (%.2f%%)", 100.0 * run / ena);
        fprintf(stdout, "\n");
    }

}

void clean(struct evlist *evsel_list)
{
disable_counters(evsel_list);
read_counters(evsel_list, NULL, NULL, NULL, NULL, NULL, NULL);
evlist__close(evsel_list);
evlist__free_stats(evsel_list);
evlist__delete(evsel_list);
}

int num_entries(struct evlist \*evsel_list)
{
return evsel_list-\>core.nr_entries;
}

I'm trying to measure perf events on a Intel Alder/Raptor Lake heterogeneous CPUs inside a program manager written in C. Now I'm getting a segmentation fault when I'm creating the counters.

Segfault where, on what instruction or C statement? Use a debugger to find out, and edit your question with details of what goes wrong in your [mcve]. Have you tested this code on other CPUs, like ones that don't have a mix of different cores? — Peter Cordes, Apr 28 '23 at 14:55

Measuring Perf events on Intel Alder/Raptor Lake CPUs inside a C program with perf code

0 Answers0