I’m trying to use perf_event_open to sample the memory access of a process. But I used the code from this question:
Using perf_event_open() to get sample address ,however the addr=0
And I did some modifications to sample another process:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#define PERF_PAGES (1 + (1 << 16))
struct perf_sample {
struct perf_event_header header;
__u64 ip;
__u32 pid, tid; /* if PERF_SAMPLE_TID */
__u64 addr; /* if PERF_SAMPLE_ADDR */
__u64 weight; /* if PERF_SAMPLE_WEIGHT */
/* __u64 data_src; /* if PERF_SAMPLE_DATA_SRC */ */
__u64 phy_addr;
};
int perf_event_open(struct perf_event_attr *attr,pid_t pid,int cpu,int group_fd,unsigned long flags)
{
return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}
void workload()
{
int i,c=0;
for(i=0;i<100000000;i++)
{
c+=i*i;
c-=i*100;
c+=i*i*i/100;
}
}
int startup()
{
struct perf_event_attr attr;
memset(&attr,0,sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(struct perf_event_attr);
attr.config = 0x82d0;
attr.config1 = 0;
attr.sample_period = 1000;
attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR ;
attr.disabled = 0;
//attr.inherit = 1;
//attr.inherit_thread = 1;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.exclude_callchain_kernel = 1;
attr.exclude_callchain_user = 1;
attr.precise_ip = 2;
pid_t target_pid = 19284; // the process pid I want to sampe
int fd=perf_event_open(&attr,target_pid,-1,-1,0);
if(fd<0)
{
perror("Cannot open perf fd!");
return -1;
}
return fd;
}
void scan_thread(struct perf_event_mmap_page *p)
{
char *pbuf = (char *)p + p->data_offset;
__sync_synchronize();
printf("%d,n", p->data_size);
if(p->data_head == p->data_tail) {
return;
}
struct perf_event_header *ph = (void *)(pbuf + (p->data_tail % p->data_size));
struct perf_sample* ps;
switch(ph->type) {
case PERF_RECORD_SAMPLE:
ps = (struct perf_sample*)ph;
// assert(ps != NULL);
if(ps == NULL)
{
printf("nulln");
}
if(ps!= NULL && ps->addr != 0) {
printf("ip %lxn", ps->ip);
printf("tid %dn", ps->tid);
printf("addr: %lx n", ps->addr);
}
//printf("addr, %lxn", ps->addr);
//printf("phy addr, %lxn", ps->phy_addr);
break;
default:
printf("type %dn", ph->type);
break;
}
}
int main()
{
int fd = startup();
size_t mmap_size = sysconf(_SC_PAGESIZE) * PERF_PAGES;
struct perf_event_mmap_page *p = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
// start to perf
ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
while(1)
{
// uint64_t instructions;
// read(fd,&instructions,sizeof(instructions));
// printf("instructions=%ldn",instructions);
// sleep(1);
workload();
scan_thread(p);
sleep(1);
}
}
This works fine when I’m sampling a single-thread process. But if the process has multiple threads, it seems that it can only sample the main thread.
I tried to set:
attr.inherit = 1;
//or
attr.inherit_thread = 1;
However, if I set one of them. The mmap to the sampling ring-buffer will fail.
Is the attribute inherit not compatible with the sampling mode? If so, how can I sample all the events of all threads in a specific process using perf_event_open?