<kernel v5.0>
Swap 엔트리
리눅스 커널의 Swap 엔트리 관리는 커널 v4.20-rc1에서 Radix Tree Exceptional 방식에서 XArray를 사용하는 방식으로 변경되었다.
swap_entry_t 구조
다음 그림은 32비트 시스템에서 운영하는 swap 엔트리 구조를 보여준다.
- 이 값은 arm h/w pte 엔트리가 아니라 arm 리눅스 pte 엔트리에 저장되며 이 페이지가 swap 엔트리임을 의미한다.
다음 그림은 64비트 시스템에서 운영하는 swap 엔트리 구조를 보여준다.
swap_entry_t 구조체
include/linux/mm_types.h
/* * A swap entry has to fit into a "unsigned long", as the entry is hidden * in the "index" field of the swapper address space. */
typedef struct { unsigned long val; } swp_entry_t;
아키텍처 독립 Swap 엔트리
swp_entry()
include/linux/swapops.h
/* * Store a type+offset into a swp_entry_t in an arch-independent format */ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset) { swp_entry_t ret; ret.val = (type << SWP_TYPE_SHIFT(ret)) | (offset & SWP_OFFSET_MASK); return ret; }
offset 값과 type 값으로 swap 엔트리를 구성한다.
swp_type()
include/linux/swapops.h
/* * Extract the `type' field from a swp_entry_t. The swp_entry_t is in * arch-independent format */
static inline unsigned swp_type(swp_entry_t entry) { return (entry.val >> SWP_TYPE_SHIFT); }
swap 엔트리에서 type 값을 반환한다.
swp_offset()
include/linux/swapops.h
/* * Extract the `offset' field from a swp_entry_t. The swp_entry_t is in * arch-independent format */
static inline pgoff_t swp_offset(swp_entry_t entry) { return entry.val & SWP_OFFSET_MASK; }
swap 엔트리에서 offset 값을 반환한다.
SWP_TYPE_SHIFT() & SWP_OFFSET_MASK()
include/linux/swapops.h
/* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in * the low-order bits. * * We arrange the `type' and `offset' fields so that `type' is at the seven * high-order bits of the swp_entry_t and `offset' is right-aligned in the * remaining bits. Although `type' itself needs only five bits, we allow for * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */
#define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1)
매크로 상수
include/linux/swap.h
/* * MAX_SWAPFILES defines the maximum number of swaptypes: things which can * be swapped to. The swap type and the offset into that swap type are * encoded into pte's and into pgoff_t's in the swapcache. Using five bits * for the type means that the maximum number of swapcache pages is 27 bits * on 32-bit-pgoff_t architectures. And that assumes that the architecture packs * the type/offset into the pte as 5/27 as well. */
#define MAX_SWAPFILES_SHIFT 5
include/linux/swap.h
/* * NUMA node memory migration support */
#ifdef CONFIG_MIGRATION #define SWP_MIGRATION_NUM 2 #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else #define SWP_MIGRATION_NUM 0 #endif
ARM32 Swap 엔트리
__swp_entry()
arch/arm/include/asm/pgtable.h
#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
offset 값과 type 값으로 arm swap 엔트리를 구성한다.
__swp_type()
arch/arm/include/asm/pgtable.h
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
arm swap 엔트리에서 type 값을 반환한다.
__swp_offset()
arch/arm/include/asm/pgtable.h
#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT)
arm swap 엔트리에서 offset 값을 반환한다.
매크로 상수
arch/arm/include/asm/pgtable.h
/* * Encode and decode a swap entry. Swap entries are stored in the Linux * page tables as follows: * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 * <--------------- offset ------------------------> < type -> 0 0 * * This gives us up to 31 swap files and 128GB per swap file. Note that * the offset field is always non-zero. */
#define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
ARM64 Swap 엔트리
__swp_entry()
arch/arm64/include/asm/pgtable.h
#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
offset 값과 type 값으로 arm64 swap 엔트리를 구성한다.
__swp_type()
arch/arm64/include/asm/pgtable.h
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
arm64 swap 엔트리에서 type 값을 반환한다.
__swp_offset()
arch/arm64/include/asm/pgtable.h
#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
arm64 swap 엔트리에서 50bit offset 값을 반환한다.
매크로 상수
arch/arm64/include/asm/pgtable.h
/* * Encode and decode a swap entry: * bits 0-1: present (must be zero) * bits 2-7: swap type * bits 8-57: swap offset * bit 58: PTE_PROT_NONE (must be zero) */
#define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 6 #define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
Swap PTE 엔트리 식별
is_swp_pte()
include/linux/swapops.h
/* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { return !pte_none(pte) && !pte_present(pte); }
swap된 pte 엔트리인지 여부를 반환한다.
- PTE가 NONE 설정이 아니면서 PRESENT 설정도 없는 경우가 swap 상태이다.
Swap 캐시
backing storage에 예약된 슬롯을 가진 공유 페이지는 swap 캐시로 간주된다. swap 캐시는 파일 캐시와 다음 2 가지가 다르다.
- page->mapping 이 &swapper_space[]를 사용한다. (address_space)
- add_to_page_cache() 대신 add_to_swap_cache() 함수를 사용한다.