Per-cpu -4- (atomic operations)
this_cpu_cmpxchg_double()
include/linux/percpu-defs.h
#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
per-cpu 값인 pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 atomic하게 대입한다.
- cmpxchg_double() 함수와 다른 점
- 다른 cpu와 경합할 필요가 없는 per-cpu 값을 교체하기 위해 더 빠른 atomic operation을 기대한다.
- arm 아키텍처에서는 atomic opeation 동작을 하는 동안만 local irq를 막는다.
- arm64 아키텍처에서는 atomic operation 동작을 하는 동안만 preemption을 막는다.
- 다른 cpu와 경합할 필요가 없는 per-cpu 값을 교체하기 위해 더 빠른 atomic operation을 기대한다.
아래 그림은 this_cpu_cmpxchg_double() 함수가 처리되는 과정을 보여준다.
__pcpu_double_call_return_bool()
include/linux/percpu-defs.h
/* * Special handling for cmpxchg_double. cmpxchg_double is passed two * percpu variables. The first has to be aligned to a double word * boundary and the second has to follow directly thereafter. * We enforce this on all architectures even if they don't support * a double cmpxchg instruction, since it's a cheap requirement, and it * avoids breaking the requirement for architectures with the instruction. */ #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ ({ \ bool pdcrb_ret__; \ __verify_pcpu_ptr(&(pcp1)); \ BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \ VM_BUG_ON((unsigned long)(&(pcp2)) != \ (unsigned long)(&(pcp1)) + sizeof(pcp1)); \ switch(sizeof(pcp1)) { \ case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ default: \ __bad_size_call_parameter(); break; \ } \ pdcrb_ret__; \ })
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입한다.
- 데이터 길이에 따라 인수 stem1 ~ stem8을 호출한다.
- 예) stem=this_cpu_cmpxchg_double_
- this_cpu_cmpxchg_double_1, this_cpu_cmpxchg_double_2, this_cpu_cmpxchg_double_4, this_cpu_cmpxchg_double_8
- 예) stem=this_cpu_cmpxchg_double_
this_cpu_cmpxchg_double_1()
include/asm-generic/percpu.h
#ifndef this_cpu_cmpxchg_double_1 #define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif #ifndef this_cpu_cmpxchg_double_2 #define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif #ifndef this_cpu_cmpxchg_double_4 #define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif #ifndef this_cpu_cmpxchg_double_8 #define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입한다.
- 32bit arm에서는 double word를 atomic하게 처리하는 연산이 없으므로 generic 함수를 호출한다.
this_cpu_generic_cmpxchg_double()
include/asm-generic/percpu.h
#define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ int __ret; \ unsigned long __flags; \ raw_local_irq_save(__flags); \ __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ raw_local_irq_restore(__flags); \ __ret; \ })
인터럽트를 잠시 비활성화 시킨 채로 pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하고 다시 인터럽트를 원래대로 돌린다.
raw_cpu_cmpxchg_double()
include/linux/percpu-defs.h
#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하는데 성공하면 true를 반환한다.
raw_cpu_cmpxchg_double_1()
include/asm-generic/percpu.h
#ifndef raw_cpu_cmpxchg_double_1 #define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif #ifndef raw_cpu_cmpxchg_double_2 #define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif #ifndef raw_cpu_cmpxchg_double_4 #define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif #ifndef raw_cpu_cmpxchg_double_8 #define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) #endif
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하는데 성공하면 true를 반환한다.
- 32bit arm에서는 double word를 atomic하게 처리하는 연산이 없으므로 generic 함수를 호출한다.
raw_cpu_generic_cmpxchg_double()
include/asm-generic/percpu.h
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ int __ret = 0; \ if (raw_cpu_read(pcp1) == (oval1) && \ raw_cpu_read(pcp2) == (oval2)) { \ raw_cpu_write(pcp1, nval1); \ raw_cpu_write(pcp2, nval2); \ __ret = 1; \ } \ (__ret); \ })
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하는데 성공하면 true를 반환한다.
raw_cpu_read()
include/linux/percpu-defs.h
#define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp)
pcp 값을 반환한다.
raw_cpu_write()
include/linux/percpu-defs.h
#define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val)
pcp 값으로 val 값을 대입한다.
__pcpu_size_call_return()
include/linux/percpu-defs.h
#define __pcpu_size_call_return(stem, variable) \ ({ \ typeof(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable); break; \ case 2: pscr_ret__ = stem##2(variable); break; \ case 4: pscr_ret__ = stem##4(variable); break; \ case 8: pscr_ret__ = stem##8(variable); break; \ default: \ __bad_size_call_parameter(); break; \ } \ pscr_ret__; \ })
variable 형의 사이즈에 따라 인수로 지정된 stem1~8을 호출하여 pcp 값을 반환한다.
- 예) stem=raw_cpu_read_
- stem=raw_cpu_read_1, stem=raw_cpu_read_2, stem=raw_cpu_read_4, stem=raw_cpu_read_8
__pcpu_size_call()
include/linux/percpu-defs.h
#define __pcpu_size_call(stem, variable, ...) \ do { \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: stem##1(variable, __VA_ARGS__);break; \ case 2: stem##2(variable, __VA_ARGS__);break; \ case 4: stem##4(variable, __VA_ARGS__);break; \ case 8: stem##8(variable, __VA_ARGS__);break; \ default: \ __bad_size_call_parameter();break; \ } \ } while (0)
variable 형의 사이즈에 따라 인수로 지정된 stem1~8을 호출하여 pcp 값에 val 값을 대입한다.
- 예) stem=raw_cpu_write_
- stem=raw_cpu_write_1, stem=raw_cpu_write_2, stem=raw_cpu_write_4, stem=raw_cpu_write_8
raw_cpu_read_1()
include/asm-generic/percpu.h
#ifndef raw_cpu_read_1 #define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) #endif #ifndef raw_cpu_read_2 #define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) #endif #ifndef raw_cpu_read_4 #define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) #endif #ifndef raw_cpu_read_8 #define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) #endif
pcp의 값을 읽어온다.
raw_cpu_write_1()
include/asm-generic/percpu.h
#ifndef raw_cpu_write_1 #define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif #ifndef raw_cpu_write_2 #define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif #ifndef raw_cpu_write_4 #define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif #ifndef raw_cpu_write_8 #define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif
pcp에 val 값을 저장한다.
raw_cpu_generic_to_op()
include/asm-generic/percpu.h
#define raw_cpu_generic_to_op(pcp, val, op) \ do { \ *raw_cpu_ptr(&(pcp)) op val; \ } while (0)
pcp와 val 값에 op 연산을 한다.
- 예) raw_cpu_generic_to_op(pcp, val, +=)
- pcp += val
참고
- per-cpu -1- (Basic) | 문c
- per-cpu -2- (초기화) | 문c
- per-cpu -3- (동적 할당) | 문c
- Per-cpu -4- (atomic operations) | 문c – 현재글