Per-cpu -4- (atomic operations)
this_cpu_cmpxchg_double()
include/linux/percpu-defs.h
#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
per-cpu 값인 pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 atomic하게 대입한다.
- cmpxchg_double() 함수와 다른 점
- 다른 cpu와 경합할 필요가 없는 per-cpu 값을 교체하기 위해 더 빠른 atomic operation을 기대한다.
- arm 아키텍처에서는 atomic opeation 동작을 하는 동안만 local irq를 막는다.
- arm64 아키텍처에서는 atomic operation 동작을 하는 동안만 preemption을 막는다.
- 다른 cpu와 경합할 필요가 없는 per-cpu 값을 교체하기 위해 더 빠른 atomic operation을 기대한다.
아래 그림은 this_cpu_cmpxchg_double() 함수가 처리되는 과정을 보여준다.

__pcpu_double_call_return_bool()
include/linux/percpu-defs.h
/*
* Special handling for cmpxchg_double. cmpxchg_double is passed two
* percpu variables. The first has to be aligned to a double word
* boundary and the second has to follow directly thereafter.
* We enforce this on all architectures even if they don't support
* a double cmpxchg instruction, since it's a cheap requirement, and it
* avoids breaking the requirement for architectures with the instruction.
*/
#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \
({ \
bool pdcrb_ret__; \
__verify_pcpu_ptr(&(pcp1)); \
BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \
VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \
VM_BUG_ON((unsigned long)(&(pcp2)) != \
(unsigned long)(&(pcp1)) + sizeof(pcp1)); \
switch(sizeof(pcp1)) { \
case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \
case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \
case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \
case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \
default: \
__bad_size_call_parameter(); break; \
} \
pdcrb_ret__; \
})
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입한다.
- 데이터 길이에 따라 인수 stem1 ~ stem8을 호출한다.
- 예) stem=this_cpu_cmpxchg_double_
- this_cpu_cmpxchg_double_1, this_cpu_cmpxchg_double_2, this_cpu_cmpxchg_double_4, this_cpu_cmpxchg_double_8
- 예) stem=this_cpu_cmpxchg_double_
this_cpu_cmpxchg_double_1()
include/asm-generic/percpu.h
#ifndef this_cpu_cmpxchg_double_1
#define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
#ifndef this_cpu_cmpxchg_double_2
#define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
#ifndef this_cpu_cmpxchg_double_4
#define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
#ifndef this_cpu_cmpxchg_double_8
#define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입한다.
- 32bit arm에서는 double word를 atomic하게 처리하는 연산이 없으므로 generic 함수를 호출한다.
this_cpu_generic_cmpxchg_double()
include/asm-generic/percpu.h
#define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
int __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
__ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \
oval1, oval2, nval1, nval2); \
raw_local_irq_restore(__flags); \
__ret; \
})
인터럽트를 잠시 비활성화 시킨 채로 pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하고 다시 인터럽트를 원래대로 돌린다.
raw_cpu_cmpxchg_double()
include/linux/percpu-defs.h
#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하는데 성공하면 true를 반환한다.
raw_cpu_cmpxchg_double_1()
include/asm-generic/percpu.h
#ifndef raw_cpu_cmpxchg_double_1
#define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
#ifndef raw_cpu_cmpxchg_double_2
#define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
#ifndef raw_cpu_cmpxchg_double_4
#define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
#ifndef raw_cpu_cmpxchg_double_8
#define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
#endif
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하는데 성공하면 true를 반환한다.
- 32bit arm에서는 double word를 atomic하게 처리하는 연산이 없으므로 generic 함수를 호출한다.
raw_cpu_generic_cmpxchg_double()
include/asm-generic/percpu.h
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
int __ret = 0; \
if (raw_cpu_read(pcp1) == (oval1) && \
raw_cpu_read(pcp2) == (oval2)) { \
raw_cpu_write(pcp1, nval1); \
raw_cpu_write(pcp2, nval2); \
__ret = 1; \
} \
(__ret); \
})
pcp(pcp1, pcp2) 값이 old 값(oval1, oval2)과 같은 경우 pcp에 new 값(nval1, nval2)을 대입하는데 성공하면 true를 반환한다.
raw_cpu_read()
include/linux/percpu-defs.h
#define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp)
pcp 값을 반환한다.
raw_cpu_write()
include/linux/percpu-defs.h
#define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val)
pcp 값으로 val 값을 대입한다.
__pcpu_size_call_return()
include/linux/percpu-defs.h
#define __pcpu_size_call_return(stem, variable) \
({ \
typeof(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable); break; \
case 2: pscr_ret__ = stem##2(variable); break; \
case 4: pscr_ret__ = stem##4(variable); break; \
case 8: pscr_ret__ = stem##8(variable); break; \
default: \
__bad_size_call_parameter(); break; \
} \
pscr_ret__; \
})
variable 형의 사이즈에 따라 인수로 지정된 stem1~8을 호출하여 pcp 값을 반환한다.
- 예) stem=raw_cpu_read_
- stem=raw_cpu_read_1, stem=raw_cpu_read_2, stem=raw_cpu_read_4, stem=raw_cpu_read_8
__pcpu_size_call()
include/linux/percpu-defs.h
#define __pcpu_size_call(stem, variable, ...) \
do { \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: stem##1(variable, __VA_ARGS__);break; \
case 2: stem##2(variable, __VA_ARGS__);break; \
case 4: stem##4(variable, __VA_ARGS__);break; \
case 8: stem##8(variable, __VA_ARGS__);break; \
default: \
__bad_size_call_parameter();break; \
} \
} while (0)
variable 형의 사이즈에 따라 인수로 지정된 stem1~8을 호출하여 pcp 값에 val 값을 대입한다.
- 예) stem=raw_cpu_write_
- stem=raw_cpu_write_1, stem=raw_cpu_write_2, stem=raw_cpu_write_4, stem=raw_cpu_write_8
raw_cpu_read_1()
include/asm-generic/percpu.h
#ifndef raw_cpu_read_1 #define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) #endif #ifndef raw_cpu_read_2 #define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) #endif #ifndef raw_cpu_read_4 #define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) #endif #ifndef raw_cpu_read_8 #define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) #endif
pcp의 값을 읽어온다.
raw_cpu_write_1()
include/asm-generic/percpu.h
#ifndef raw_cpu_write_1 #define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif #ifndef raw_cpu_write_2 #define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif #ifndef raw_cpu_write_4 #define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif #ifndef raw_cpu_write_8 #define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op(pcp, val, =) #endif
pcp에 val 값을 저장한다.
raw_cpu_generic_to_op()
include/asm-generic/percpu.h
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
} while (0)
pcp와 val 값에 op 연산을 한다.
- 예) raw_cpu_generic_to_op(pcp, val, +=)
- pcp += val
참고
- per-cpu -1- (Basic) | 문c
- per-cpu -2- (초기화) | 문c
- per-cpu -3- (동적 할당) | 문c
- Per-cpu -4- (atomic operations) | 문c – 현재글