Memory Model -4- (APIs)
for_each_migratetype_order()
include/linux/mmzone.h
#define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++)
- buddy 메모리 할당자가 사용하는 MAX_ORDER(11) 수 만큼 루프를 돈다.
- 메모리 hotplug에 대한 이주 플래그 관리를 담당하는 MIGRATE_TYPES 만큼 루프를 돈다.
존 및 노드 관련
is_highmem_idx()
include/linux/mmzone.h
static inline int is_highmem_idx(enum zone_type idx) { #ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM || (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM)); #else return 0; #endif }
zone_idx()
include/linux/mmzone.h
/* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
- zone 인덱스 번호를 리턴한다.
- 예) ZONE_DMA, ZONE_NORMAL을 사용하는 경우 0과 1이 리턴된다.
- 예) ZONE_NORMAL만 사용하는 경우 0이 리턴된다.
set_page_links()
include/linux/mm.h
static inline void set_page_links(struct page *page, enum zone_type zone, unsigned long node, unsigned long pfn) { set_page_zone(page, zone); set_page_node(page, node); #ifdef SECTION_IN_PAGE_FLAGS set_page_section(page, pfn_to_section_nr(pfn)); #endif }
- page->flags에 zone, node 및 section 정보를 설정한다.
set_page_zone()
include/linux/mm.h
static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; }
- page->flags에 zone 정보를 설정한다.
set_page_node()
include/linux/mm.h
static inline void set_page_node(struct page *page, unsigned long node) { page->flags &= ~(NODES_MASK << NODES_PGSHIFT); page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; }
- page->flags에 노드 정보를 설정한다.
page_zone_id()
include/linux/mm.h
/* * The identification function is mainly used by the buddy allocator for * determining if two pages could be buddies. We are not really identifying * the zone since we could be using the section number id if we do not have * node id available in page flags. * We only guarantee that it will return the same value for two combinable * pages in a zone. */ static inline int page_zone_id(struct page *page) { return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; }
페이지에서 zone id를 추출하여 반환한다.
Sprsemem 섹션 관련
set_page_section()
include/linux/mm.h
#ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } #endif
- page->flags에 섹션 정보를 설정한다.
__pfn_to_section()
include/linux/mmzone.h
static inline struct mem_section *__pfn_to_section(unsigned long pfn) { return __nr_to_section(pfn_to_section_nr(pfn)); }
pfn 값에 대응하는 mem_section 구조체 정보를 리턴한다.
- pfn_to_section_nr()
- pfn 값으로 섹션 번호를 알아온다.
- __nr_to_section()
- 섹션 번호로 mem_section 구조체 정보를 리턴한다.
pfn_to_section_nr()
include/linux/mmzone.h
#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
- pfn의 섹션(Sparse) 인덱스를 리턴한다.
- 예) Realview-PBX
- 섹션 사이즈가 256M 단위(PFN_SECTION_SHIFT=16)이므로 섹션 번호는 0~15까지의 결과
- 예) Realview-PBX
__nr_to_section()
include/linux/mmzone.h
static inline struct mem_section *__nr_to_section(unsigned long nr) { if (!mem_section[SECTION_NR_TO_ROOT(nr)]) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; }
- 섹션 번호로 mem_section 구조체 정보를 리턴한다.
SECTION_NR_TO_ROOT()
include/linux/mmzone.h
#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT)
- 섹션 번호로 ROOT 번호를 리턴한다.
#ifdef CONFIG_SPARSEMEM_EXTREME #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) #else #define SECTIONS_PER_ROOT 1 #endif
- ROOT 하나 당 섹션 수
- PAGE_SIZE(4K)에 mem_section 구조체가 들어갈 수 있는 수
present_section_nr()
include/linux/mmzone.h
static inline int present_section_nr(unsigned long nr) { return present_section(__nr_to_section(nr)); }
섹션 번호에 해당하는 mem_section이 준비되어 있는지 확인한다. 준비되어 있지 않은 경우 해당 섹션은 hole을 의미한다.
- __nr_to_section()
- 섹션 번호로 mem_section 구조체 정보를 알아온다.
- present_section()
- mem_section 구조체 정보에 섹션이 존재하는지 확인한다.
present_section()
include/linux/mmzone.h
static inline int present_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); }
mem_section 구조체 정보에 섹션이 존재하는지 확인한다.
- SECTION_MARKED_PRESENT 식별 비트가 설정되어 있는지 확인한다.
__section_mem_map_addr()
include/linux/mmzone.h
static inline struct page *__section_mem_map_addr(struct mem_section *section) { unsigned long map = section->section_mem_map; map &= SECTION_MAP_MASK; return (struct page *)map; }
헤당 Sparse memory 섹션에 대한 mem_map 주소를 반환한다.
include/linux/mmzone.h
/* * We use the lower bits of the mem_map pointer to store * a little bit of information. There should be at least * 3 bits here due to 32-bit alignment. */ #define SECTION_MARKED_PRESENT (1UL<<0) #define SECTION_HAS_MEM_MAP (1UL<<1) #define SECTION_MAP_LAST_BIT (1UL<<2) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) #define SECTION_NID_SHIFT 2
페이지의 참조 사용 및 사용 해제
get_page()
include/linux/mm.h
static inline void get_page(struct page *page) { page = compound_head(page); /* * Getting a normal page or the head of a compound page * requires to already have an elevated page->_refcount. */ VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); page_ref_inc(page); }
참조 카운터를 1 증가시킨다.
get_page_unless_zero()
include/linux/mm.h
/* * Try to grab a ref unless the page has a refcount of zero, return false if * that is the case. * This can be called when MMU is off so it must not access * any of the virtual mappings. */
static inline int get_page_unless_zero(struct page *page) { return page_ref_add_unless(page, 1, 0); }
참조 카운터(p->_refcount)를 읽은 후 0 값과 다른 경우에 한해 증가시킨다. 결과 값이 0이 아니면 true를 반환한다.
put_page()
include/linux/mm.h
static inline void put_page(struct page *page) { page = compound_head(page); /* * For devmap managed pages we need to catch refcount transition from * 2 to 1, when refcount reach one it means the page is free and we * need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ if (put_devmap_managed_page(page)) return; if (put_page_testzero(page)) __put_page(page); }
참조 카운터를 1 감소 시킨다. 만일 0이되면 페이지의 회수를 진행한다.
put_page_testzero()
include/linux/mm.h
/* * Methods to modify the page usage count. * * What counts for a page usage: * - cache mapping (page->mapping) * - private data (page->private) * - page mapped in a task's page tables, each mapping * is counted separately * * Also, many kernel routines increase the page count before a critical * routine so they can be sure the page doesn't go away from under them. */ /* * Drop a ref, return true if the refcount fell to zero (the page has no users) */
static inline int put_page_testzero(struct page *page) { VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); return page_ref_dec_and_test(page); }
페이지의 참조카운터를 감소시키고 0(사용완료)인지 확인하여 사용완료 여부를 반환한다.
- 0=사용중, 1=사용완료(참조 _count가 0이된 경우)
page vs pfn 변환
PFN과 page 구조체 포인터와의 변환은 다음 2개의 API를 사용한다.
include/asm-generic/memory_model.h
#define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page
- page_to_pfn()
- page 구조체 포인터로 pfn 값을 알아온다.
- pfn_to_page()
- pfn 값으로 page 구조체 포인터를 알아온다.
다음과 같이 flat 및 sparse물리 메모리 모델에 따라 변환 방법이 달라지며, sparse 물리 모델의 경우 vmemmap 사용여부에 따라 다시 2 가지로 나뉜다.
CONFIG_FLATMEM
#define __pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET)) #define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \ ARCH_PFN_OFFSET)
- __pfn_to_page()
- ARCH_PFN_OFFSET은 물리 DRAM의 시작 PFN 값을 가리킨다.
- mem_map[@pfn – 물리 DRAM 시작 PFN]
CONFIG_SPARSEMEM
/* * Note: section's mem_map is encoded to reflect its start_pfn. * section[i].section_mem_map == mem_map's address - start_pfn; */ #define __page_to_pfn(pg) \ ({ const struct page *__pg = (pg); \ int __sec = page_to_section(__pg); \ (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ }) #define __pfn_to_page(pfn) \ ({ unsigned long __pfn = (pfn); \ struct mem_section *__sec = __pfn_to_section(__pfn); \ __section_mem_map_addr(__sec) + __pfn; \ })
- __pfn_to_page()
- pfn을 섹션 단위로 바꾼 후 mem_section[][]에 접근하여 섹션에 대한 mam_map[@pfn] 주소를 반환한다.
CONFIG_SPARSEMEM & CONFIG_SPARSEMEM_VMEMMAP
/* memmap is virtually contiguous. */ #define __pfn_to_page(pfn) (vmemmap + (pfn)) #define __page_to_pfn(page) (unsigned long)((page) - vmemmap)
- __pfn_to_page()
- = mem_map[@pfn]
- vmemmap = mem_map[0]의 가상 주소가 저장되어 있다.
페이지 플래그
include/linux/page-flags.h
/* * Various page->flags bits: * * PG_reserved is set for special pages. The "struct page" of such a page * should in general not be touched (e.g. set dirty) except by its owner. * Pages marked as PG_reserved include: * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS, * initrd, HW tables) * - Pages reserved or allocated early during boot (before the page allocator * was initialized). This includes (depending on the architecture) the * initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much * much more. Once (if ever) freed, PG_reserved is cleared and they will * be given to the page allocator. * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying * to read/write these pages might end badly. Don't touch! * - The zero page(s) * - Pages not added to the page allocator when onlining a section because * they were excluded via the online_page_callback() or because they are * PG_hwpoison. * - Pages allocated in the context of kexec/kdump (loaded kernel image, * control pages, vmcoreinfo) * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are * not marked PG_reserved (as they might be in use by somebody else who does * not respect the caching strategy). * - Pages part of an offline section (struct pages of offline sections should * not be trusted as they will be initialized when first onlined). * - MCA pages on ia64 * - Pages holding CPU notes for POWER Firmware Assisted Dump * - Device memory (e.g. PMEM, DAX, HMM) * Some PG_reserved pages will be excluded from the hibernation image. * PG_reserved does in general not hinder anybody from dumping or swapping * and is no longer required for remap_pfn_range(). ioremap might require it. * Consequently, PG_reserved for a page mapped into user space can indicate * the zero page, the vDSO, MMIO pages or device memory. * * The PG_private bitflag is set on pagecache pages if they contain filesystem * specific data (which is normally at page->private). It can be used by * private allocations for its own usage. * * During initiation of disk I/O, PG_locked is set. This bit is set before I/O * and cleared when writeback _starts_ or when read _completes_. PG_writeback * is set before writeback starts and cleared when it finishes. * * PG_locked also pins a page in pagecache, and blocks truncation of the file * while it is held. * * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * * PG_swapbacked is set when a page uses swap as a backing storage. This are * usually PageAnon or shmem pages but please note that even anonymous pages * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as * a result of MADV_FREE). * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * * PG_error is set to indicate that an I/O error occurred on this page. * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. * * PG_hwpoison indicates that a page got corrupted in hardware and contains * data with incorrect ECC bits that triggered a machine check. Accessing is * not safe since it may cause another machine check. Don't touch! */
/* * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which * extends from the high bits downwards. * * | FIELD | ... | FLAGS | * N-1 ^ 0 * (NR_PAGEFLAGS) * * The fields area is reserved for fields mapping zone, node (for NUMA) and * SPARSEMEM section (for variants of SPARSEMEM that require section ids like * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP).
enum pageflags { PG_locked, /* Page is locked. Don't touch. */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_active, PG_workingset, PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_error, PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ PG_head, /* A head page */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif #ifdef CONFIG_64BIT PG_arch_2, #endif #ifdef CONFIG_KASAN_HW_TAGS PG_skip_kasan_poison, #endif __NR_PAGEFLAGS, /* Filesystems */ PG_checked = PG_owner_priv_1, /* SwapBacked */ PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes * when those inodes are being locally cached. */ PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, /* Has a grant mapping of another (foreign) domain's page. */ PG_foreign = PG_owner_priv_1, /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, /* SLOB */ PG_slob_free = PG_private, /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_workingset, #ifdef CONFIG_MEMORY_FAILURE /* * Compound pages. Stored in first tail page's flags. * Indicates that at least one subpage is hwpoisoned in the * THP. */ PG_has_hwpoisoned = PG_mappedtodisk, #endif /* non-lru isolated movable page */ PG_isolated = PG_reclaim, /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, };
page->flags에 기록되는 추가 정보
linux/page-flags-layout.h
/* * page->flags layout: * * There are five possibilities for how page->flags get laid out. The first * pair is for the normal case without sparsemem. The second pair is for * sparsemem when there is plenty of space for node and section information. * The last is when there is insufficient space in page->flags and a separate * lookup is necessary. * * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | * " plus space for last_cpupid: | NODE | ZONE | LAST_CPUPID ... | FLAGS | * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | * " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS | * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | */
커널 설정에 따라 page->flags에 플래그들 이외에 SECTION, NODE, ZONE 및 LAST_CPUPID 정보 등이 기록된다.
Reserved 플래그(예)
PageReserved(), SetPageReserved(), ClearPageReserved(), __ClearPageReserved()
include/linux/page-flags.h
PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
- PageReserved(), SetPageReserved(), ClearPageReserved() 및 __ClearPageReserved() static inline 함수가 만들어진다.
#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
- 아래 매크로를 사용하여 PageXXX(), SetPageXXX() 및 ClearPageXXX() static inline 함수가 만들어진다.
/* * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname) \ static inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &page->flags); } #define SETPAGEFLAG(uname, lname) \ static inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &page->flags); } #define CLEARPAGEFLAG(uname, lname) \ static inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &page->flags); }
#define __CLEARPAGEFLAG(uname, lname) \ static inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &page->flags); }
- test_bit()
- &page->flags의 PG_xxxxx 번호 비트가 set되었는지 여부를 알아온다.
- set_bit()
- &page->flags의 PG_xxxxx 번호 비트를 atomic하게 set 한다.
- clear_bit()
- &page->flags의 PG_xxxxx 번호 비트를 atomic하게 clear 한다.
- __clear_bit()
- &page->flags의 PG_xxxxx 번호 비트를 clear 한다. (non-atomic)
일부 플래그의 재편성 (p->page_type)
아래 4개의 PG_buddy, PG_ballon, PG_kmemcg, PG_table 플래그는 p->_mapcount와 같이 사용하는 것으로 바뀌었고, 다시 유니언 선언하여 공유된 p->page_type을 사용한다.
- 최초 p->flags에서 관리되던 플래그들이 p->_mapcount로 분리되었었다.
- 후 새 커널에서는 p->_mapcount 대신 유니온으로 공유된 p->page_type를 사용한다. 단 p->_mapcount의 초기 값이 -1(0xffff_ffff)이므로 비트의 설정과 해제는 반대로 사용한다.
- 예) Set Buddy
- old 커널: p->_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE(-128)
- new 커널: p->page_type &= ~0x80
- 예) Clear Buddy
- p->_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE(-1)
- new 커널: p->page_type |= 0x80
- 참고: mm: split page_type out from _mapcount
- 예) Set Buddy
include/linux/page-flags.h
/* * PageBuddy() indicates that the page is free and in the buddy system * (see mm/page_alloc.c). */ PAGE_TYPE_OPS(Buddy, buddy) /* * PageBalloon() is true for pages that are on the balloon page list * (see mm/balloon_compaction.c). */ PAGE_TYPE_OPS(Balloon, balloon) /* * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. */ PAGE_TYPE_OPS(Kmemcg, kmemcg) /* * Marks pages in use as page tables. */ PAGE_TYPE_OPS(Table, table)
/* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and * low bits so that an underflow or overflow of page_mapcount() won't be * mistaken for a page type value. */
#define PAGE_TYPE_BASE 0xf0000000 /* Reserve 0x0000007f to catch underflows of page_mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_balloon 0x00000100 #define PG_kmemcg 0x00000200 #define PG_table 0x00000400 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) static inline int page_has_type(struct page *page) { return (int)page->page_type < PAGE_MAPCOUNT_RESERVE; } #define PAGE_TYPE_OPS(uname, lname) \ static __always_inline int Page##uname(struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ }
위의 매크로를 통해 PageBuddy(), __SetPageBuddy(), __ClearPageBuddy() 등의 인라인 함수가 생성된다.
페이지 블럭 관련
set_pageblock_flags_group()
linux/pageblock-flags.h
#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \ set_pfnblock_flags_mask(page, flags, page_to_pfn(page), \ end_bitidx, \ (1 << (end_bitidx - start_bitidx + 1)) - 1)
set_pfnblock_flags_mask()
mm/page_alloc.c
/** * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages * @page: The page within the block of interest * @flags: The flags to set * @pfn: The target page frame number * @end_bitidx: The last bit of interest * @mask: mask of bits that the caller is interested in */ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long pfn, unsigned long end_bitidx, unsigned long mask) { struct zone *zone; unsigned long *bitmap; unsigned long bitidx, word_bitidx; unsigned long old_word, word; BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); zone = page_zone(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); word_bitidx = bitidx / BITS_PER_LONG; bitidx &= (BITS_PER_LONG-1); VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page); bitidx += end_bitidx; mask <<= (BITS_PER_LONG - bitidx - 1); flags <<= (BITS_PER_LONG - bitidx - 1); word = ACCESS_ONCE(bitmap[word_bitidx]); for (;;) { old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags); if (word == old_word) break; word = old_word; } }
get_pfnblock_flags_mask()
mm/page_alloc.c
/** * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages * @page: The page within the block of interest * @pfn: The target page frame number * @end_bitidx: The last bit of interest to retrieve * @mask: mask of bits that the caller is interested in * * Return: pageblock_bits flags */ unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long end_bitidx, unsigned long mask) { struct zone *zone; unsigned long *bitmap; unsigned long bitidx, word_bitidx; unsigned long word; zone = page_zone(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); word_bitidx = bitidx / BITS_PER_LONG; bitidx &= (BITS_PER_LONG-1); word = bitmap[word_bitidx]; bitidx += end_bitidx; return (word >> (BITS_PER_LONG - bitidx - 1)) & mask; }
get_pageblock_bitmap()
mm/page_alloc.c
/* Return a pointer to the bitmap storing bits affecting a block of pages */ static inline unsigned long *get_pageblock_bitmap(struct zone *zone, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM return __pfn_to_section(pfn)->pageblock_flags; #else return zone->pageblock_flags; #endif /* CONFIG_SPARSEMEM */ }
@pfn이 포함된 페이지 블럭 비트맵을 반환한다. (usemap)
- usemap에는 4비트로 표현된 mobility 플래그들이 저장된다.
pfn_to_bitidx()
mm/page_alloc.c
static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM pfn &= (PAGES_PER_SECTION-1); return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; #else pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages); return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; #endif /* CONFIG_SPARSEMEM */ }
pfn에 대한 pageblock에서 비트 인덱스를 반환한다.
SECTION_BLOCKFLAGS_BITS
include/linux/mmzone.h
#define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
섹션 당 pageblock 비트 수
- NR_PAGEBLOCK_BITS
- pageblock에 대해 필요한 비트 수=4
- PFN_SECTION_SHIFT
- 섹션 길이 표현에 필요한 비트 수 – 페이지 길이 표현에 필요한 비트 수를 뺀 값
- arm64: 섹션 길이=27(128M 표현) bits – 12(4KB 표현) bits = 15
- 섹션 길이 표현에 필요한 비트 수 – 페이지 길이 표현에 필요한 비트 수를 뺀 값
- 예) arm64에서 섹션 크기=128M, pageblock_order=9인 경우
- SECTION_BLOCKFLAGS_BITS = 2^(15-9) * 4 bits = 256 bits
참고
- Memory Model -1- (Basic) | 문c
- Memory Model -2- (mem_map) | 문c
- Memory Model -3- (Sparse Memory) | 문c
- Memory Model -4- (APIs) | 문c – 현재 글
- ZONE 타입 | 문c
- bootmem_init | 문c
- zone_sizes_init() | 문c
- NUMA -1- (ARM64 초기화) | 문c
- build_all_zonelists() | 문c
- An introduction to compound pages | LWN.net
- agemap, from the userspace perspective | kernel.org
- Documentation/vm/hugetlbpage.txt | kernel.org