TRACE_EVENT 특징
– Static Kernel Tracing: 커널 코드안에서 static probe point가 있다.
– TRACE_EVENT 매크로는 low overhead 성능을 위해 만들어졌다.
– ftrace는 물론 perf, LTTng와 SystemTap에서도 사용된다.
TRACE_EVENT 매크로 사용
- 커널 코드내부에 tracepoint를 만들어야 한다.
- tracepoint를 hook하는 callback 펑션을 만들어야 한다.
- callback 펑션은 tracer ring buffer를 통해 가능한 빨리 데이터 기록.
- 기록된 데이터는 사람이 읽을 수 있는 데이터 형태를 갖어야 한다.
TRACE_EVENT(name, proto, args, struct, assign, print)
TRACE_EVENT 매크로 인수
.name
.prototype
- tracepoint callbacks을 위한 prototype
TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next)
trace_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next);
.args
.struct (optional)
- tracepoint 로 전달될 데이터가 저장될 구조체
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
__field( pid_t, prev_pid )
__field( int, prev_prio )
__field( long, prev_state )
__array( char, next_comm, TASK_COMM_LEN )
__field( pid_t, next_pid )
__field( int, next_prio )
),
struct {
char prev_comm[TASK_COMM_LEN];
pid_t prev_pid;
int prev_prio;
long prev_state;
char next_comm[TASK_COMM_LEN];
pid_t next_pid;
int next_prio;
};
.assign
- ring buffer에 연결된 구조체에 데이터를 옮기기 위해 사용
TP_fast_assign(
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
__entry->prev_state = prev->state;
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
),
.print
- 사람이 읽을 수 있는 ASCII 형태 출력
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> " \
"next_comm=%s next_pid=%d next_prio=%d",
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
__entry->prev_state ?
__print_flags(__entry->prev_state, "|",
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
{ 128, "W" }) : "R",
__entry->next_comm, __entry->next_pid, __entry->next_prio)
format 화일
#cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 264
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", REC->prev_comm, REC->prev_pid, REC->prev_prio, REC->prev_state & (1024-1) ? __print_flags(REC->prev_state & (1024-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", REC->prev_state & 1024 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
trace 함수 호출 예)
static inline void
context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
[04:37:30.629091416] (+0.000050732) sched_switch: { cpu_id = 2 }, { prev_comm = "swapper/2", prev_tid = 0, prev_prio = 20,
prev_state = 0, next_comm = "lttng", next_tid = 8347, next_prio = 20 }
TRACE_EVENT 정의된 헤더 화일
TRACE_EVENT(sched_switch,
TP_PROTO(struct task_struct *prev,
struct task_struct *next),
TP_ARGS(prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
__field( pid_t, prev_pid )
__field( int, prev_prio )
__field( long, prev_state )
__array( char, next_comm, TASK_COMM_LEN )
__field( pid_t, next_pid )
__field( int, next_prio )
),
TP_fast_assign(
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
__entry->prev_state = __trace_sched_switch_state(prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
),
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
__entry->prev_state & (TASK_STATE_MAX-1) ?
__print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
{ 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
__entry->prev_state & TASK_STATE_MAX ? "+" : "",
__entry->next_comm, __entry->next_pid, __entry->next_prio)
);
get_random_bytes() 에서의 사용 예)
drivers/char/random.c – get_random_bytes()
trace_get_random_bytes(nbytes, _RET_IP_);
include/trace/events/random.h
DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
TP_PROTO(int nbytes, unsigned long IP),
TP_ARGS(nbytes, IP)
);
DEFINE_EVENT를 통해 아래의 함수들이 생성됨
extern struct tracepoint __tracepoint_get_random_bytes;
static inline void trace_get_random_bytes();
static inline void trace_get_random_bytes_rcuidle();
static inline int register_trace_get_random_bytes();
static inline int unregister_trace_get_random_bytes();
static inline void check_trace_callback_type_get_random_bytes();
static inline bool trace_get_random_bytes_enabled();
include/linux/tracepoint.h
struct tracepoint {
const char *name; /* Tracepoint name */
struct static_key key;
void (*regfunc)(void);
void (*unregfunc)(void);
struct tracepoint_func __rcu *funcs;
};
#define DEFINE_EVENT(template, name, proto, args) \
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
#define TRACE_EVENT(name, proto, args, struct, assign, print) \
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
#define DECLARE_TRACE(name, proto, args) \
__DECLARE_TRACE(name, PARAMS(proto), \
PARAMS(args), 1, \
PARAMS(void *__data, proto), \
PARAMS(__data, args))
include/linux/tracepoint.h
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
extern struct tracepoint __tracepoint_##name; \
static inline void trace_##name(proto) \
{ \
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(&__tracepoint_##name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
TP_CONDITION(cond),,); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
rcu_read_lock_sched_notrace(); \
rcu_dereference_sched(__tracepoint_##name.funcs);\
rcu_read_unlock_sched_notrace(); \
} \
} \
__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
PARAMS(cond), PARAMS(data_proto),PARAMS(data_args)) \
static inline int \
register_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_register(&__tracepoint_##name, \
(void *)probe, data); \
} \
static inline int \
unregister_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_unregister(&__tracepoint_##name,\
(void *)probe, data); \
} \
static inline void \
check_trace_callback_type_##name(void (*cb)(data_proto)) \
{ \
} \
static inline bool \
trace_##name##_enabled(void) \
{ \
return static_key_false(&__tracepoint_##name.key); \
}
CONFIG_TRACEPOINTS 옵션을 사용하여 커널을 빌드 시 위와 같은 함수로 동작하고 그렇지 않은 경우 빈함수가 만들어지고 에러를 리턴한다.
#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \
do { \
struct tracepoint_func *it_func_ptr; \
void *it_func; \
void *__data; \
\
if (!(cond)) \
return; \
prercu; \
rcu_read_lock_sched_notrace(); \
it_func_ptr = rcu_dereference_sched((tp)->funcs); \
if (it_func_ptr) { \
do { \
it_func = (it_func_ptr)->func; \
__data = (it_func_ptr)->data; \
((void(*)(proto))(it_func))(args); \
} while ((++it_func_ptr)->func); \
} \
rcu_read_unlock_sched_notrace(); \
postrcu; \
} while (0)
이벤트 분석
# cat events/sched/enable
0
# echo 1 > events/sched/enable
# cat set_event
sched:sched_wake_idle_without_ipi
sched:sched_swap_numa
sched:sched_stick_numa
sched:sched_move_numa
sched:sched_process_hang
sched:sched_pi_setprio
sched:sched_stat_runtime
sched:sched_stat_blocked
sched:sched_stat_iowait
sched:sched_stat_sleep
sched:sched_stat_wait
sched:sched_process_exec
sched:sched_process_fork
sched:sched_process_wait
sched:sched_wait_task
sched:sched_process_exit
sched:sched_process_free
sched:sched_migrate_task
sched:sched_switch
sched:sched_wakeup_new
sched:sched_wakeup
sched:sched_kthread_stop_ret
sched:sched_kthread_stop
# ls events/sched/
enable sched_process_exec sched_stat_iowait sched_wait_task
filter sched_process_exit sched_stat_runtime sched_wake_idle_without_ipi
sched_kthread_stop sched_process_fork sched_stat_sleep sched_wakeup
sched_kthread_stop_ret sched_process_free sched_stat_wait sched_wakeup_new
sched_migrate_task sched_process_hang sched_stick_numa
sched_move_numa sched_process_wait sched_swap_numa
sched_pi_setprio sched_stat_blocked sched_switch
사용 가능한 이벤트 조회
# cat /sys/kernel/debug/tracing/available_events
mac80211:drv_return_void
mac80211:drv_return_int
(...)
irq_vectors:thermal_apic_exit
irq_vectors:thermal_apic_entry
irq_vectors:threshold_apic_exit
irq_vectors:threshold_apic_entry
irq_vectors:call_function_single_exit
irq_vectors:call_function_single_entry
irq_vectors:call_function_exit
irq_vectors:call_function_entry
irq_vectors:irq_work_exit
irq_vectors:irq_work_entry
irq_vectors:x86_platform_ipi_exit
irq_vectors:x86_platform_ipi_entry
irq_vectors:error_apic_exit
irq_vectors:error_apic_entry
irq_vectors:spurious_apic_exit
irq_vectors:spurious_apic_entry
irq_vectors:reschedule_exit
irq_vectors:reschedule_entry
irq_vectors:local_timer_exit
irq_vectors:local_timer_entry
nmi:nmi_handler
syscalls:sys_exit_mmap
syscalls:sys_enter_mmap
vsyscall:emulate_vsyscall
raw_syscalls:sys_exit
raw_syscalls:sys_enter
mce:mce_record
tlb:tlb_flush
exceptions:page_fault_kernel
exceptions:page_fault_user
syscalls:sys_exit_unshare
syscalls:sys_enter_unshare
syscalls:sys_exit_set_tid_address
syscalls:sys_enter_set_tid_address
task:task_rename
task:task_newtask
(...)
참고