TRACE_EVENT 특징
– Static Kernel Tracing: 커널 코드안에서 static probe point가 있다.
– TRACE_EVENT 매크로는 low overhead 성능을 위해 만들어졌다.
– ftrace는 물론 perf, LTTng와 SystemTap에서도 사용된다.
TRACE_EVENT 매크로 사용
- 커널 코드내부에 tracepoint를 만들어야 한다.
- tracepoint를 hook하는 callback 펑션을 만들어야 한다.
- callback 펑션은 tracer ring buffer를 통해 가능한 빨리 데이터 기록.
- 기록된 데이터는 사람이 읽을 수 있는 데이터 형태를 갖어야 한다.
1 | TRACE_EVENT(name, proto, args, struct , assign, print) |
TRACE_EVENT 매크로 인수
.name
- tracepoint 이름
- trace_ 접두어로 함수가 만들어진다.
1 | TRACE_EVENT(sched_switch, |
.prototype
- tracepoint callbacks을 위한 prototype
1 | TP_PROTO( struct rq *rq, struct task_struct *prev, struct task_struct *next) |
1 | trace_sched_switch( struct rq *rq, struct task_struct *prev, struct task_struct *next); |
.args
- prototype에 매치되는 실제 데이터 인수
1 | TP_ARGS(rq, prev, next), |
1 | #define TRACE_POINT(name, proto, args) \ |
2 | void trace_##name(proto) \ |
4 | if (trace_##name##_active) \ |
.struct (optional)
- tracepoint 로 전달될 데이터가 저장될 구조체
2 | __array( char , prev_comm, TASK_COMM_LEN ) |
3 | __field( pid_t, prev_pid ) |
4 | __field( int , prev_prio ) |
5 | __field( long , prev_state ) |
6 | __array( char , next_comm, TASK_COMM_LEN ) |
7 | __field( pid_t, next_pid ) |
8 | __field( int , next_prio ) |
2 | char prev_comm[TASK_COMM_LEN]; |
6 | char next_comm[TASK_COMM_LEN]; |
.assign
- ring buffer에 연결된 구조체에 데이터를 옮기기 위해 사용
2 | memcpy (__entry->next_comm, next->comm, TASK_COMM_LEN); |
3 | __entry->prev_pid = prev->pid; |
4 | __entry->prev_prio = prev->prio; |
5 | __entry->prev_state = prev->state; |
6 | memcpy (__entry->prev_comm, prev->comm, TASK_COMM_LEN); |
7 | __entry->next_pid = next->pid; |
8 | __entry->next_prio = next->prio; |
.print
- 사람이 읽을 수 있는 ASCII 형태 출력
1 | TP_printk( "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> " \ |
2 | "next_comm=%s next_pid=%d next_prio=%d" , |
3 | __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, |
5 | __print_flags(__entry->prev_state, "|" , |
6 | { 1, "S" } , { 2, "D" }, { 4, "T" }, { 8, "t" }, |
7 | { 16, "Z" }, { 32, "X" }, { 64, "x" }, |
9 | __entry->next_comm, __entry->next_pid, __entry->next_prio) |
format 화일
05 | field:unsigned short common_type; offset:0; size:2; signed:0; |
06 | field:unsigned char common_flags; offset:2; size:1; signed:0; |
07 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; |
08 | field:int common_pid; offset:4; size:4; signed:1; |
10 | field:char prev_comm[16]; offset:8; size:16; signed:1; |
11 | field:pid_t prev_pid; offset:24; size:4; signed:1; |
12 | field:int prev_prio; offset:28; size:4; signed:1; |
13 | field:long prev_state; offset:32; size:8; signed:1; |
14 | field:char next_comm[16]; offset:40; size:16; signed:1; |
15 | field:pid_t next_pid; offset:56; size:4; signed:1; |
16 | field:int next_prio; offset:60; size:4; signed:1; |
18 | print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", REC->prev_comm, REC->prev_pid, REC->prev_prio, REC->prev_state & (1024-1) ? __print_flags(REC->prev_state & (1024-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", REC->prev_state & 1024 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio |
trace 함수 호출 예)
02 | context_switch(struct rq *rq, struct task_struct *prev, |
03 | struct task_struct *next) |
05 | struct mm_struct *mm, *oldmm; |
07 | prepare_task_switch(rq, prev, next); |
08 | trace_sched_switch(rq, prev, next); |
10 | oldmm = prev->active_mm; |
1 | [04:37:30.629091416] (+0.000050732) sched_switch: { cpu_id = 2 }, { prev_comm = "swapper/2", prev_tid = 0, prev_prio = 20, |
2 | prev_state = 0, next_comm = "lttng", next_tid = 8347, next_prio = 20 } |
TRACE_EVENT 정의된 헤더 화일
01 | TRACE_EVENT(sched_switch, |
03 | TP_PROTO( struct task_struct *prev, |
04 | struct task_struct *next), |
09 | __array( char , prev_comm, TASK_COMM_LEN ) |
10 | __field( pid_t, prev_pid ) |
11 | __field( int , prev_prio ) |
12 | __field( long , prev_state ) |
13 | __array( char , next_comm, TASK_COMM_LEN ) |
14 | __field( pid_t, next_pid ) |
15 | __field( int , next_prio ) |
19 | memcpy (__entry->next_comm, next->comm, TASK_COMM_LEN); |
20 | __entry->prev_pid = prev->pid; |
21 | __entry->prev_prio = prev->prio; |
22 | __entry->prev_state = __trace_sched_switch_state(prev); |
23 | memcpy (__entry->prev_comm, prev->comm, TASK_COMM_LEN); |
24 | __entry->next_pid = next->pid; |
25 | __entry->next_prio = next->prio; |
28 | TP_printk( "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d" , |
29 | __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, |
30 | __entry->prev_state & (TASK_STATE_MAX-1) ? |
31 | __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|" , |
32 | { 1, "S" } , { 2, "D" }, { 4, "T" }, { 8, "t" }, |
33 | { 16, "Z" }, { 32, "X" }, { 64, "x" }, |
34 | { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R" , |
35 | __entry->prev_state & TASK_STATE_MAX ? "+" : "" , |
36 | __entry->next_comm, __entry->next_pid, __entry->next_prio) |
get_random_bytes() 에서의 사용 예)
drivers/char/random.c – get_random_bytes()
1 | trace_get_random_bytes(nbytes, _RET_IP_); |
include/trace/events/random.h
1 | DEFINE_EVENT(random__get_random_bytes, get_random_bytes, |
2 | TP_PROTO( int nbytes, unsigned long IP), |
DEFINE_EVENT를 통해 아래의 함수들이 생성됨
1 | extern struct tracepoint __tracepoint_get_random_bytes; |
2 | static inline void trace_get_random_bytes(); |
3 | static inline void trace_get_random_bytes_rcuidle(); |
4 | static inline int register_trace_get_random_bytes(); |
5 | static inline int unregister_trace_get_random_bytes(); |
6 | static inline void check_trace_callback_type_get_random_bytes(); |
7 | static inline bool trace_get_random_bytes_enabled(); |
include/linux/tracepoint.h
5 | void (*unregfunc)( void ); |
6 | struct tracepoint_func __rcu *funcs; |
1 | #define DEFINE_EVENT(template, name, proto, args) \ |
2 | DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) |
1 | #define TRACE_EVENT(name, proto, args, struct, assign, print) \ |
2 | DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) |
1 | #define DECLARE_TRACE(name, proto, args) \ |
2 | __DECLARE_TRACE(name, PARAMS(proto), \ |
4 | PARAMS( void *__data, proto), \ |
include/linux/tracepoint.h
01 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ |
02 | extern struct tracepoint __tracepoint_##name; \ |
03 | static inline void trace_##name(proto) \ |
05 | if (static_key_false(&__tracepoint_##name.key)) \ |
06 | __DO_TRACE(&__tracepoint_##name, \ |
07 | TP_PROTO(data_proto), \ |
09 | TP_CONDITION(cond),,); \ |
10 | if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ |
11 | rcu_read_lock_sched_notrace(); \ |
12 | rcu_dereference_sched(__tracepoint_##name.funcs);\ |
13 | rcu_read_unlock_sched_notrace(); \ |
16 | __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ |
17 | PARAMS(cond), PARAMS(data_proto),PARAMS(data_args)) \ |
19 | register_trace_##name( void (*probe)(data_proto), void *data) \ |
21 | return tracepoint_probe_register(&__tracepoint_##name, \ |
22 | ( void *)probe, data); \ |
25 | unregister_trace_##name( void (*probe)(data_proto), void *data) \ |
27 | return tracepoint_probe_unregister(&__tracepoint_##name,\ |
28 | ( void *)probe, data); \ |
31 | check_trace_callback_type_##name( void (*cb)(data_proto)) \ |
35 | trace_##name##_enabled( void ) \ |
37 | return static_key_false(&__tracepoint_##name.key); \ |
CONFIG_TRACEPOINTS 옵션을 사용하여 커널을 빌드 시 위와 같은 함수로 동작하고 그렇지 않은 경우 빈함수가 만들어지고 에러를 리턴한다.
01 | #define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ |
03 | struct tracepoint_func *it_func_ptr; \ |
10 | rcu_read_lock_sched_notrace(); \ |
11 | it_func_ptr = rcu_dereference_sched((tp)->funcs); \ |
14 | it_func = (it_func_ptr)->func; \ |
15 | __data = (it_func_ptr)->data; \ |
16 | (( void (*)(proto))(it_func))(args); \ |
17 | } while ((++it_func_ptr)->func); \ |
19 | rcu_read_unlock_sched_notrace(); \ |
이벤트 분석
# cat events/sched/enable
0
# echo 1 > events/sched/enable
# cat set_event
sched:sched_wake_idle_without_ipi
sched:sched_swap_numa
sched:sched_stick_numa
sched:sched_move_numa
sched:sched_process_hang
sched:sched_pi_setprio
sched:sched_stat_runtime
sched:sched_stat_blocked
sched:sched_stat_iowait
sched:sched_stat_sleep
sched:sched_stat_wait
sched:sched_process_exec
sched:sched_process_fork
sched:sched_process_wait
sched:sched_wait_task
sched:sched_process_exit
sched:sched_process_free
sched:sched_migrate_task
sched:sched_switch
sched:sched_wakeup_new
sched:sched_wakeup
sched:sched_kthread_stop_ret
sched:sched_kthread_stop
# ls events/sched/
enable sched_process_exec sched_stat_iowait sched_wait_task
filter sched_process_exit sched_stat_runtime sched_wake_idle_without_ipi
sched_kthread_stop sched_process_fork sched_stat_sleep sched_wakeup
sched_kthread_stop_ret sched_process_free sched_stat_wait sched_wakeup_new
sched_migrate_task sched_process_hang sched_stick_numa
sched_move_numa sched_process_wait sched_swap_numa
sched_pi_setprio sched_stat_blocked sched_switch
사용 가능한 이벤트 조회
# cat /sys/kernel/debug/tracing/available_events
mac80211:drv_return_void
mac80211:drv_return_int
(...)
irq_vectors:thermal_apic_exit
irq_vectors:thermal_apic_entry
irq_vectors:threshold_apic_exit
irq_vectors:threshold_apic_entry
irq_vectors:call_function_single_exit
irq_vectors:call_function_single_entry
irq_vectors:call_function_exit
irq_vectors:call_function_entry
irq_vectors:irq_work_exit
irq_vectors:irq_work_entry
irq_vectors:x86_platform_ipi_exit
irq_vectors:x86_platform_ipi_entry
irq_vectors:error_apic_exit
irq_vectors:error_apic_entry
irq_vectors:spurious_apic_exit
irq_vectors:spurious_apic_entry
irq_vectors:reschedule_exit
irq_vectors:reschedule_entry
irq_vectors:local_timer_exit
irq_vectors:local_timer_entry
nmi:nmi_handler
syscalls:sys_exit_mmap
syscalls:sys_enter_mmap
vsyscall:emulate_vsyscall
raw_syscalls:sys_exit
raw_syscalls:sys_enter
mce:mce_record
tlb:tlb_flush
exceptions:page_fault_kernel
exceptions:page_fault_user
syscalls:sys_exit_unshare
syscalls:sys_enter_unshare
syscalls:sys_exit_set_tid_address
syscalls:sys_enter_set_tid_address
task:task_rename
task:task_newtask
(...)
참고