TRACE_EVENT

TRACE_EVENT 특징

– Static Kernel Tracing: 커널 코드안에서 static probe point가 있다.
– TRACE_EVENT 매크로는 low overhead 성능을 위해 만들어졌다.
– ftrace는 물론 perf, LTTng와 SystemTap에서도 사용된다.

TRACE_EVENT 매크로 사용

  • 커널 코드내부에 tracepoint를 만들어야 한다.
  • tracepoint를 hook하는 callback 펑션을 만들어야 한다.
  • callback 펑션은 tracer ring buffer를 통해 가능한 빨리 데이터 기록.
  • 기록된 데이터는 사람이 읽을 수 있는 데이터 형태를 갖어야 한다.
1TRACE_EVENT(name, proto, args, struct, assign, print)

TRACE_EVENT 매크로 인수

.name

  • tracepoint 이름
  • trace_ 접두어로 함수가 만들어진다.
    1TRACE_EVENT(sched_switch,

     

.prototype

  • tracepoint callbacks을 위한 prototype
    1TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next)
    1trace_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next);

     

.args

  • prototype에 매치되는 실제 데이터 인수
    1TP_ARGS(rq, prev, next),
    1#define TRACE_POINT(name, proto, args) \
    2     void trace_##name(proto)            \
    3     {                                   \
    4             if (trace_##name##_active)  \
    5                     callback(args);     \
    6     }

     

.struct (optional)

  • tracepoint 로 전달될 데이터가 저장될 구조체
    1TP_STRUCT__entry(
    2        __array(    char,   prev_comm,  TASK_COMM_LEN   )
    3        __field(    pid_t,  prev_pid            )
    4        __field(    int,    prev_prio           )
    5        __field(    long,   prev_state          )
    6        __array(    char,   next_comm,  TASK_COMM_LEN   )
    7        __field(    pid_t,  next_pid            )
    8        __field(    int,    next_prio           )
    9    ),
    1struct {
    2        char   prev_comm[TASK_COMM_LEN];
    3        pid_t  prev_pid;
    4        int    prev_prio;
    5        long   prev_state;
    6        char   next_comm[TASK_COMM_LEN];
    7        pid_t  next_pid;
    8        int    next_prio;
    9  };

     

.assign

  • ring buffer에 연결된 구조체에 데이터를 옮기기 위해 사용
    1TP_fast_assign(
    2       memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
    3       __entry->prev_pid    = prev->pid;
    4       __entry->prev_prio   = prev->prio;
    5       __entry->prev_state  = prev->state;
    6       memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
    7       __entry->next_pid    = next->pid;
    8       __entry->next_prio   = next->prio;
    9   ),

     

.print

  • 사람이 읽을 수 있는 ASCII 형태 출력
    1TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> " \
    2          "next_comm=%s next_pid=%d next_prio=%d",
    3        __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
    4        __entry->prev_state ?
    5          __print_flags(__entry->prev_state, "|",
    6                { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
    7                { 16, "Z" }, { 32, "X" }, { 64, "x" },
    8                { 128, "W" }) : "R",
    9        __entry->next_comm, __entry->next_pid, __entry->next_prio)

 format 화일

01#cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
02name: sched_switch
03ID: 264
04format:
05        field:unsigned short common_type;       offset:0;       size:2; signed:0;
06        field:unsigned char common_flags;       offset:2;       size:1; signed:0;
07        field:unsigned char common_preempt_count;       offset:3;       size:1; signed:0;
08        field:int common_pid;   offset:4;       size:4; signed:1;
09 
10        field:char prev_comm[16];       offset:8;       size:16;        signed:1;
11        field:pid_t prev_pid;   offset:24;      size:4; signed:1;
12        field:int prev_prio;    offset:28;      size:4; signed:1;
13        field:long prev_state;  offset:32;      size:8; signed:1;
14        field:char next_comm[16];       offset:40;      size:16;        signed:1;
15        field:pid_t next_pid;   offset:56;      size:4; signed:1;
16        field:int next_prio;    offset:60;      size:4; signed:1;
17 
18print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", REC->prev_comm, REC->prev_pid, REC->prev_prio, REC->prev_state & (1024-1) ? __print_flags(REC->prev_state & (1024-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", REC->prev_state & 1024 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio

 

trace 함수 호출 예)

01static inline void
02 context_switch(struct rq *rq, struct task_struct *prev,
03            struct task_struct *next)
04 {
05     struct mm_struct *mm, *oldmm;
06 
07     prepare_task_switch(rq, prev, next);
08     trace_sched_switch(rq, prev, next);
09     mm = next->mm;
10     oldmm = prev->active_mm;
1[04:37:30.629091416] (+0.000050732) sched_switch: { cpu_id = 2 }, { prev_comm = "swapper/2", prev_tid = 0, prev_prio = 20,
2prev_state = 0, next_comm = "lttng", next_tid = 8347, next_prio = 20 }

 

TRACE_EVENT 정의된 헤더 화일

01TRACE_EVENT(sched_switch,
02 
03        TP_PROTO(struct task_struct *prev,
04                 struct task_struct *next),
05 
06        TP_ARGS(prev, next),
07 
08        TP_STRUCT__entry(
09                __array(        char,   prev_comm,      TASK_COMM_LEN   )
10                __field(        pid_t,  prev_pid                        )
11                __field(        int,    prev_prio                       )
12                __field(        long,   prev_state                      )
13                __array(        char,   next_comm,      TASK_COMM_LEN   )
14                __field(        pid_t,  next_pid                        )
15                __field(        int,    next_prio                       )
16        ),
17 
18        TP_fast_assign(
19                memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
20                __entry->prev_pid       = prev->pid;
21                __entry->prev_prio      = prev->prio;
22                __entry->prev_state     = __trace_sched_switch_state(prev);
23                memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
24                __entry->next_pid       = next->pid;
25                __entry->next_prio      = next->prio;
26        ),
27 
28        TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
29                __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
30                __entry->prev_state & (TASK_STATE_MAX-1) ?
31                  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
32                                { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
33                                { 16, "Z" }, { 32, "X" }, { 64, "x" },
34                                { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
35                __entry->prev_state & TASK_STATE_MAX ? "+" : "",
36                __entry->next_comm, __entry->next_pid, __entry->next_prio)
37);

 

get_random_bytes() 에서의 사용 예)

drivers/char/random.c – get_random_bytes()

1trace_get_random_bytes(nbytes, _RET_IP_);

 

include/trace/events/random.h

1DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
2    TP_PROTO(int nbytes, unsigned long IP),
3           TP_ARGS(nbytes, IP)
4);

DEFINE_EVENT를 통해 아래의 함수들이 생성됨

1extern struct tracepoint __tracepoint_get_random_bytes;
2static inline void trace_get_random_bytes();
3static inline void trace_get_random_bytes_rcuidle();
4static inline int register_trace_get_random_bytes();
5static inline int unregister_trace_get_random_bytes();
6static inline void check_trace_callback_type_get_random_bytes();
7static inline bool trace_get_random_bytes_enabled();

 

include/linux/tracepoint.h

1struct tracepoint {
2        const char *name;               /* Tracepoint name */
3        struct static_key key;
4        void (*regfunc)(void);
5        void (*unregfunc)(void);
6        struct tracepoint_func __rcu *funcs;
7};
1#define DEFINE_EVENT(template, name, proto, args)       \
2        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
1#define TRACE_EVENT(name, proto, args, struct, assign, print)       \
2        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
1#define DECLARE_TRACE(name, proto, args)            \
2                    __DECLARE_TRACE(name, PARAMS(proto),        \
3        PARAMS(args), 1,                    \
4                PARAMS(void *__data, proto),                    \
5                     PARAMS(__data, args))

 

include/linux/tracepoint.h

01#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
02        extern struct tracepoint __tracepoint_##name;               \
03        static inline void trace_##name(proto)                              \
04        {                                                                           \
05                if (static_key_false(&__tracepoint_##name.key))             \
06                        __DO_TRACE(&__tracepoint_##name,                    \
07                                TP_PROTO(data_proto),                           \
08                                TP_ARGS(data_args),                             \
09                                TP_CONDITION(cond),,);                      \
10                if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {             \
11                        rcu_read_lock_sched_notrace();                      \
12                        rcu_dereference_sched(__tracepoint_##name.funcs);\
13                        rcu_read_unlock_sched_notrace();                    \
14                }                                                                   \
15        }                                                                           \
16        __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
17                PARAMS(cond), PARAMS(data_proto),PARAMS(data_args)) \
18        static inline int                                                       \
19        register_trace_##name(void (*probe)(data_proto), void *data)    \
20        {                                                                           \
21                return tracepoint_probe_register(&__tracepoint_##name, \
22                                                (void *)probe, data);           \
23        }                                                                           \
24        static inline int                                                       \
25        unregister_trace_##name(void (*probe)(data_proto), void *data)  \
26        {                                                                           \
27                return tracepoint_probe_unregister(&__tracepoint_##name,\
28                                                (void *)probe, data);           \
29        }                                                                           \
30        static inline void                                                      \
31        check_trace_callback_type_##name(void (*cb)(data_proto))    \
32        {                                                                           \
33        }                                                                           \
34        static inline bool                                                      \
35        trace_##name##_enabled(void)                                        \
36        {                                                                           \
37                return static_key_false(&__tracepoint_##name.key);      \
38        }

CONFIG_TRACEPOINTS 옵션을 사용하여 커널을 빌드 시 위와 같은 함수로 동작하고 그렇지 않은 경우 빈함수가 만들어지고 에러를 리턴한다.

01#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu)          \
02        do {                                                                    \
03                struct tracepoint_func *it_func_ptr;                            \
04                void *it_func;                                                  \
05                void *__data;                                                   \
06                                                                                    \
07                if (!(cond))                                                    \
08                        return;                                                 \
09                prercu;                                                         \
10                rcu_read_lock_sched_notrace();                              \
11                it_func_ptr = rcu_dereference_sched((tp)->funcs);        \
12                if (it_func_ptr) {                                              \
13                        do {                                                    \
14                                it_func = (it_func_ptr)->func;               \
15                                __data = (it_func_ptr)->data;                \
16                                ((void(*)(proto))(it_func))(args);              \
17                        } while ((++it_func_ptr)->func);                     \
18                }                                                                   \
19                rcu_read_unlock_sched_notrace();                            \
20                postrcu;                                                        \
21        } while (0)

이벤트 분석

# cat events/sched/enable
0

# echo 1 > events/sched/enable
# cat set_event
sched:sched_wake_idle_without_ipi
sched:sched_swap_numa
sched:sched_stick_numa
sched:sched_move_numa
sched:sched_process_hang
sched:sched_pi_setprio
sched:sched_stat_runtime
sched:sched_stat_blocked
sched:sched_stat_iowait
sched:sched_stat_sleep
sched:sched_stat_wait
sched:sched_process_exec
sched:sched_process_fork
sched:sched_process_wait
sched:sched_wait_task
sched:sched_process_exit
sched:sched_process_free
sched:sched_migrate_task
sched:sched_switch
sched:sched_wakeup_new
sched:sched_wakeup
sched:sched_kthread_stop_ret
sched:sched_kthread_stop

# ls events/sched/
enable                  sched_process_exec  sched_stat_iowait   sched_wait_task
filter                  sched_process_exit  sched_stat_runtime  sched_wake_idle_without_ipi
sched_kthread_stop      sched_process_fork  sched_stat_sleep    sched_wakeup
sched_kthread_stop_ret  sched_process_free  sched_stat_wait     sched_wakeup_new
sched_migrate_task      sched_process_hang  sched_stick_numa
sched_move_numa         sched_process_wait  sched_swap_numa
sched_pi_setprio        sched_stat_blocked  sched_switch

사용 가능한 이벤트 조회

# cat /sys/kernel/debug/tracing/available_events
mac80211:drv_return_void
mac80211:drv_return_int
(...)
irq_vectors:thermal_apic_exit
irq_vectors:thermal_apic_entry
irq_vectors:threshold_apic_exit
irq_vectors:threshold_apic_entry
irq_vectors:call_function_single_exit
irq_vectors:call_function_single_entry
irq_vectors:call_function_exit
irq_vectors:call_function_entry
irq_vectors:irq_work_exit
irq_vectors:irq_work_entry
irq_vectors:x86_platform_ipi_exit
irq_vectors:x86_platform_ipi_entry
irq_vectors:error_apic_exit
irq_vectors:error_apic_entry
irq_vectors:spurious_apic_exit
irq_vectors:spurious_apic_entry
irq_vectors:reschedule_exit
irq_vectors:reschedule_entry
irq_vectors:local_timer_exit
irq_vectors:local_timer_entry
nmi:nmi_handler
syscalls:sys_exit_mmap
syscalls:sys_enter_mmap
vsyscall:emulate_vsyscall
raw_syscalls:sys_exit
raw_syscalls:sys_enter
mce:mce_record
tlb:tlb_flush
exceptions:page_fault_kernel
exceptions:page_fault_user
syscalls:sys_exit_unshare
syscalls:sys_enter_unshare
syscalls:sys_exit_set_tid_address
syscalls:sys_enter_set_tid_address
task:task_rename
task:task_newtask
(...)

 

참고

 

 

댓글 남기기