리눅스 커널의 내부 동작을 trace
주요 기능
- 1) dynamic kernel function tracing
.stack usage, call graph, counting, filters - 2) latency tracing
.interrupts, wake-up - 3) static tracepoints를 사용하여 event tracing
.scheduling, interrupts, file systems, virtual guest connections
특징
- debugfs 파일시스템을 /sys/kernel/debug에 마운트하여 사용
- gcc 컴파일러의 프로파일링 옵션(-pg)로 빌드된 커널 사용
- linux kernel 2.6.27에서 mainline에 합류
- 적은(10%) overhead
- buffered(trace) vs live tracing(trace_pipe)
- trace source로 tracepoints, kprobes를 사용
Tracer Plugins
- function: 임의의 기간동안 커널 내부 함수의 호출관계를 분석
- function_graph: 그래프 형식의 함수 관계 분석
- wakeup, wakeup_dl, wakeup_rt: wake up latency 분석
- mmiotrace: 메모리 맵의 I/O에 대한 분석
- irqsoff: interrupt latency 분석
- nop: 디버깅 기능 off
preemptoff, preemptirqsoff 기능은 커널 3.0.x 에서 제거됨.
sched_switch의 경우 deprecated되어 wakeup 기능으로 동작
1) Function Tracing
function filter: 모든 함수 → 특정 함수
# cd /sys/kernel/debug/tracing # cat set_ftrace_filter #### all functions enabled #### # echo schedule_tail >> set_ftrace_filter # cat set_ftrace_filter schedule_tail # echo 'sched*' > set_ftrace_filter # cat set_ftrace_filter schedule_on_each_cpu sched_feat_open sched_feat_show (…)
제외시킬 function: 없음 → 특정 함수
# cat set_ftrace_notrace #### no functions disabled #### # cat set_ftrace_notrace xen_pte_unlock update_persistent_clock read_persistent_clock (…)
사용가능한 tracer 목록
# cat current_tracer nop # cat available_tracers blk mmiotrace function_graph wakeup_dl wakeup_rt wakeup function nop
사용가능한 function 목록
# cat available_filter_functions run_init_process try_to_run_init_process do_one_initcall match_dev_by_uuid rootfs_mount name_to_dev_t init_linuxrc init_linuxrc calibration_delay_done calibrate_delay (...)
function trace
# cat trace | head -30 # tracer: function # entries-in-buffer/entries-written: 62862/62862 #P:4 # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | bash-11785 [000] d... 172870.350417: sched_stat_sleep: comm=rcuos/0 pid=9 ... bash-11785 [000] d... 172870.350418: sched_wake_idle_without_ipi: cpu=1 bash-11785 [000] d... 172870.350419: sched_wakeup: comm=rcuos/0 pid=9 ... <idle>-0 [001] .N.. 172870.350436: schedule <-schedule_preempt_disabled <idle>-0 [001] dN.. 172870.350438: sched_stat_wait: comm=rcuos/0 pid=9 ...
function_graph trace
# cat trace | head -30 # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 3) + 11.008 us | } 1) | schedule() { 1) | /* sched_stat_wait: comm=rcuos/0 pid=9 delay=0 [ns] */ 1) | /* sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 ... ------------------------------------------ 1) <idle>-0 => rcuos/0-9 ------------------------------------------
2) Latency Tracing
wakeup
# cat trace # tracer: wakeup # # wakeup latency trace v1.1.5 on 4.0.5 # -------------------------------------------------------------------- # latency: 14 us, #75/75, CPU#0 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: watchdog/0-12 (uid:0 nice:0 policy:1 rt_prio:99) # ----------------- # # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| / delay # cmd pid ||||| time | caller # \ / ||||| \ | / <idle>-0 0dNh. 0us : 0:120:R + [000] 12: 0:R watchdog/0 <idle>-0 0dNh. 0us : ttwu_do_activate.constprop.92 <-try_to_wake_up <idle>-0 0dNh. 1us : task_woken_rt <-ttwu_do_wakeup <idle>-0 0dNh. 1us : _raw_spin_unlock <-try_to_wake_up <idle>-0 0dNh. 1us : ttwu_stat <-try_to_wake_up <idle>-0 0dNh. 1us : _raw_spin_unlock_irqrestore <-try_to_wake_up <idle>-0 0dNh. 1us : ktime_get <-watchdog_timer_fn <idle>-0 0dNh. 1us : hrtimer_forward <-watchdog_timer_fn <idle>-0 0dNh. 2us : _raw_spin_lock <-__run_hrtimer
wake_dl
# cat trace # tracer: wakeup_dl # # wakeup_dl latency trace v1.1.5 on 4.0.5 # -------------------------------------------------------------------- # latency: 665155 us, #0/0, CPU#2 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: Xorg-840 (uid:0 nice:0 policy:0 rt_prio:0) # ----------------- # # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| / delay # cmd pid ||||| time | caller # \ / ||||| \ | /
wakeup_rt
# cat trace # tracer: wakeup_rt # # wakeup_rt latency trace v1.1.5 on 4.0.5 # -------------------------------------------------------------------- # latency: 18 us, #75/75, CPU#1 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: irq/51-iwlwifi-423 (uid:0 nice:0 policy:1 rt_prio:50) # ----------------- # # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| / delay # cmd pid ||||| time | caller # \ / ||||| \ | / <idle>-0 1dNh. 0us : 0:120:R + [001] 423: 49:R irq/51-iwlwifi <idle>-0 1dNh. 0us : ttwu_do_activate.constprop.92 <-try_to_wake_up <idle>-0 1dNh. 1us : task_woken_rt <-ttwu_do_wakeup <idle>-0 1dNh. 1us : _raw_spin_unlock <-try_to_wake_up <idle>-0 1dNh. 1us : ttwu_stat <-try_to_wake_up <idle>-0 1dNh. 1us : _raw_spin_unlock_irqrestore <-try_to_wake_up <idle>-0 1dNh. 1us : add_interrupt_randomness <-handle_irq_event_percpu
3) Event Tracing
event 디렉토리
# ls events/ asoc filemap irq_vectors mac80211_msg random sock vsyscall block ftrace iwlwifi mce ras spi workqueue cfg80211 gpio iwlwifi_data migrate raw_syscalls swiotlb writeback compaction hda iwlwifi_io module rcu syscalls xen context_tracking hda_intel iwlwifi_msg napi regmap task xhci-hcd drm header_event iwlwifi_ucode net regulator thermal enable header_page jbd2 nmi rpm timer exceptions i2c kmem oom sched tlb ext4 i915 kvm pagemap scsi udp fence iommu kvmmmu power signal v4l2 filelock irq mac80211 printk skb vmscan # ls events/sched/ enable sched_process_exec sched_stat_iowait sched_wait_task filter sched_process_exit sched_stat_runtime sched_wake_idle_without_ipi sched_kthread_stop sched_process_fork sched_stat_sleep sched_wakeup sched_kthread_stop_ret sched_process_free sched_stat_wait sched_wakeup_new sched_migrate_task sched_process_hang sched_stick_numa sched_move_numa sched_process_wait sched_swap_numa sched_pi_setprio sched_stat_blocked sched_switch # ls events/sched/sched_wakeup enable filter format id trigger
event 목록
# cat available_events | head -20 mac80211:drv_return_void mac80211:drv_return_int mac80211:drv_return_bool mac80211:drv_return_u32 mac80211:drv_return_u64 mac80211:drv_start mac80211:drv_get_et_strings mac80211:drv_get_et_sset_count mac80211:drv_get_et_stats mac80211:drv_suspend mac80211:drv_resume mac80211:drv_set_wakeup mac80211:drv_stop mac80211:drv_add_interface mac80211:drv_change_interface mac80211:drv_remove_interface mac80211:drv_config mac80211:drv_bss_info_changed mac80211:drv_prepare_multicast mac80211:drv_configure_filter
4) Stack Tracing
# echo 1 > /proc/sys/kernel/stack_tracer_enabled # cat stack_trace Depth Size Location (16 entries) ----- ---- -------- 0) 2824 16 update_group_capacity+0x2c/0x270 1) 2808 8 msecs_to_jiffies+0x5/0x30 2) 2800 64 update_group_capacity+0x2c/0x270 3) 2736 384 find_busiest_group+0xfb/0x940 4) 2352 288 load_balance+0x1ac/0x8f0 5) 2064 128 pick_next_task_fair+0x488/0x8a0 6) 1936 80 __schedule+0x116/0xa10 7) 1856 8 schedule+0x37/0x90 8) 1848 184 schedule_hrtimeout_range_clock.part.26+0xfc/0x120 9) 1664 16 schedule_hrtimeout_range_clock+0x19/0x50 10) 1648 16 schedule_hrtimeout_range+0x13/0x20 11) 1632 24 poll_schedule_timeout+0x49/0x70 12) 1608 952 do_select+0x57c/0x7d0 13) 656 416 core_sys_select+0x1c7/0x2f0 14) 240 112 SyS_select+0xab/0x100 15) 128 128 system_call_fastpath+0x16/0x1b
5) Function profiling
# echo 1 > function_profile_enabled # cat trace_stat/function0 | head -15 Function Hit Time Avg s^2 -------- --- ---- --- --- __schedule 10960 647848540 us 59110.26 us 2099595 us schedule 6499 647692443 us 99660.32 us 4087623 us schedule_hrtimeout_range 1021 461369396 us 451879.9 us 5813184 us schedule_hrtimeout_range_clock 1021 461369211 us 451879.7 us 5763201 us poll_schedule_timeout 1016 454877997 us 447714.5 us 8181934 us SyS_poll 340 332641192 us 978356.4 us 105339792 us do_sys_poll 340 332640884 us 978355.5 us 105204044 us schedule_hrtimeout_range_clock 138 188387417 us 1365126 us 582984815 us SyS_select 1720 122266989 us 71085.45 us 782863.5 us core_sys_select 1720 122266182 us 71084.98 us 653943.4 us do_select 1720 122265404 us 71084.53 us 556184.2 us schedule_timeout 72 51148556 us 710396.6 us 1317174442 us SyS_read 1249 31128240 us 24922.53 us 2673959 us
* 결과 시간에서 sleep time을 제거하고 출력하려면 “echo 0 > options/sleep-time”을 사용한다.
참고
–arm에서 kprobes 사용하기(1)
–arm에서 kprobes 사용하기(2)
–Function Tracer
–FTrace