TRACE_EVENT 특징
– Static Kernel Tracing: 커널 코드안에서 static probe point가 있다.
– TRACE_EVENT 매크로는 low overhead 성능을 위해 만들어졌다.
– ftrace는 물론 perf, LTTng와 SystemTap에서도 사용된다.
TRACE_EVENT 매크로 사용
- 커널 코드내부에 tracepoint를 만들어야 한다.
- tracepoint를 hook하는 callback 펑션을 만들어야 한다.
- callback 펑션은 tracer ring buffer를 통해 가능한 빨리 데이터 기록.
- 기록된 데이터는 사람이 읽을 수 있는 데이터 형태를 갖어야 한다.
TRACE_EVENT(name, proto, args, struct, assign, print)
TRACE_EVENT 매크로 인수
.name
- tracepoint 이름
- trace_ 접두어로 함수가 만들어진다.
TRACE_EVENT(sched_switch,
.prototype
- tracepoint callbacks을 위한 prototype
TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next)
trace_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next);
.args
- prototype에 매치되는 실제 데이터 인수
TP_ARGS(rq, prev, next),
#define TRACE_POINT(name, proto, args) \ void trace_##name(proto) \ { \ if (trace_##name##_active) \ callback(args); \ }
.struct (optional)
- tracepoint 로 전달될 데이터가 저장될 구조체
TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ),
struct { char prev_comm[TASK_COMM_LEN]; pid_t prev_pid; int prev_prio; long prev_state; char next_comm[TASK_COMM_LEN]; pid_t next_pid; int next_prio; };
.assign
- ring buffer에 연결된 구조체에 데이터를 옮기기 위해 사용
TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; __entry->prev_state = prev->state; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ),
- 사람이 읽을 수 있는 ASCII 형태 출력
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> " \ "next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "W" }) : "R", __entry->next_comm, __entry->next_pid, __entry->next_prio)
format 화일
#cat /sys/kernel/debug/tracing/events/sched/sched_switch/format name: sched_switch ID: 264 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:char prev_comm[16]; offset:8; size:16; signed:1; field:pid_t prev_pid; offset:24; size:4; signed:1; field:int prev_prio; offset:28; size:4; signed:1; field:long prev_state; offset:32; size:8; signed:1; field:char next_comm[16]; offset:40; size:16; signed:1; field:pid_t next_pid; offset:56; size:4; signed:1; field:int next_prio; offset:60; size:4; signed:1; print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", REC->prev_comm, REC->prev_pid, REC->prev_prio, REC->prev_state & (1024-1) ? __print_flags(REC->prev_state & (1024-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", REC->prev_state & 1024 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
trace 함수 호출 예)
static inline void context_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); trace_sched_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm;
[04:37:30.629091416] (+0.000050732) sched_switch: { cpu_id = 2 }, { prev_comm = "swapper/2", prev_tid = 0, prev_prio = 20, prev_state = 0, next_comm = "lttng", next_tid = 8347, next_prio = 20 }
TRACE_EVENT 정의된 헤더 화일
TRACE_EVENT(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next), TP_ARGS(prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; __entry->prev_state = __trace_sched_switch_state(prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ), TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state & (TASK_STATE_MAX-1) ? __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) );
get_random_bytes() 에서의 사용 예)
drivers/char/random.c – get_random_bytes()
trace_get_random_bytes(nbytes, _RET_IP_);
include/trace/events/random.h
DEFINE_EVENT(random__get_random_bytes, get_random_bytes, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP) );
DEFINE_EVENT를 통해 아래의 함수들이 생성됨
extern struct tracepoint __tracepoint_get_random_bytes; static inline void trace_get_random_bytes(); static inline void trace_get_random_bytes_rcuidle(); static inline int register_trace_get_random_bytes(); static inline int unregister_trace_get_random_bytes(); static inline void check_trace_callback_type_get_random_bytes(); static inline bool trace_get_random_bytes_enabled();
include/linux/tracepoint.h
struct tracepoint { const char *name; /* Tracepoint name */ struct static_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; };
#define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
#define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
#define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), \ PARAMS(args), 1, \ PARAMS(void *__data, proto), \ PARAMS(__data, args))
include/linux/tracepoint.h
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond),,); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_##name.funcs);\ rcu_read_unlock_sched_notrace(); \ } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ PARAMS(cond), PARAMS(data_proto),PARAMS(data_args)) \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_##name, \ (void *)probe, data); \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_##name,\ (void *)probe, data); \ } \ static inline void \ check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool \ trace_##name##_enabled(void) \ { \ return static_key_false(&__tracepoint_##name.key); \ }
CONFIG_TRACEPOINTS 옵션을 사용하여 커널을 빌드 시 위와 같은 함수로 동작하고 그렇지 않은 경우 빈함수가 만들어지고 에러를 리턴한다.
#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ void *__data; \ \ if (!(cond)) \ return; \ prercu; \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ do { \ it_func = (it_func_ptr)->func; \ __data = (it_func_ptr)->data; \ ((void(*)(proto))(it_func))(args); \ } while ((++it_func_ptr)->func); \ } \ rcu_read_unlock_sched_notrace(); \ postrcu; \ } while (0)
이벤트 분석
# cat events/sched/enable 0 # echo 1 > events/sched/enable # cat set_event sched:sched_wake_idle_without_ipi sched:sched_swap_numa sched:sched_stick_numa sched:sched_move_numa sched:sched_process_hang sched:sched_pi_setprio sched:sched_stat_runtime sched:sched_stat_blocked sched:sched_stat_iowait sched:sched_stat_sleep sched:sched_stat_wait sched:sched_process_exec sched:sched_process_fork sched:sched_process_wait sched:sched_wait_task sched:sched_process_exit sched:sched_process_free sched:sched_migrate_task sched:sched_switch sched:sched_wakeup_new sched:sched_wakeup sched:sched_kthread_stop_ret sched:sched_kthread_stop # ls events/sched/ enable sched_process_exec sched_stat_iowait sched_wait_task filter sched_process_exit sched_stat_runtime sched_wake_idle_without_ipi sched_kthread_stop sched_process_fork sched_stat_sleep sched_wakeup sched_kthread_stop_ret sched_process_free sched_stat_wait sched_wakeup_new sched_migrate_task sched_process_hang sched_stick_numa sched_move_numa sched_process_wait sched_swap_numa sched_pi_setprio sched_stat_blocked sched_switch
사용 가능한 이벤트 조회
# cat /sys/kernel/debug/tracing/available_events mac80211:drv_return_void mac80211:drv_return_int (...) irq_vectors:thermal_apic_exit irq_vectors:thermal_apic_entry irq_vectors:threshold_apic_exit irq_vectors:threshold_apic_entry irq_vectors:call_function_single_exit irq_vectors:call_function_single_entry irq_vectors:call_function_exit irq_vectors:call_function_entry irq_vectors:irq_work_exit irq_vectors:irq_work_entry irq_vectors:x86_platform_ipi_exit irq_vectors:x86_platform_ipi_entry irq_vectors:error_apic_exit irq_vectors:error_apic_entry irq_vectors:spurious_apic_exit irq_vectors:spurious_apic_entry irq_vectors:reschedule_exit irq_vectors:reschedule_entry irq_vectors:local_timer_exit irq_vectors:local_timer_entry nmi:nmi_handler syscalls:sys_exit_mmap syscalls:sys_enter_mmap vsyscall:emulate_vsyscall raw_syscalls:sys_exit raw_syscalls:sys_enter mce:mce_record tlb:tlb_flush exceptions:page_fault_kernel exceptions:page_fault_user syscalls:sys_exit_unshare syscalls:sys_enter_unshare syscalls:sys_exit_set_tid_address syscalls:sys_enter_set_tid_address task:task_rename task:task_newtask (...)
참고
- Using the TRACE_EVENT() macro (1)
- Using the TRACE_EVENT() macro (2)
- Using the TRACE_EVENT() macro (3)
- Documentation/trace/events.txt
- Tracing on Linux
- How Can IOMMU Event Tracing