Memory Model -4- (APIs)

Memory Model -4- (APIs)

 

for_each_migratetype_order()

include/linux/mmzone.h

#define for_each_migratetype_order(order, type) \
        for (order = 0; order < MAX_ORDER; order++) \
                for (type = 0; type < MIGRATE_TYPES; type++)
  • buddy 메모리 할당자가 사용하는 MAX_ORDER(11)  수 만큼 루프를 돈다.
  • 메모리 hotplug에 대한 이주 플래그 관리를 담당하는 MIGRATE_TYPES 만큼 루프를 돈다.

 


존 및 노드 관련

is_highmem_idx()

include/linux/mmzone.h

static inline int is_highmem_idx(enum zone_type idx)
{
#ifdef CONFIG_HIGHMEM
        return (idx == ZONE_HIGHMEM ||
                (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM)); 
#else
        return 0;
#endif
}

 

zone_idx()

include/linux/mmzone.h

/*
 * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
 */
#define zone_idx(zone)          ((zone) - (zone)->zone_pgdat->node_zones)
  • zone 인덱스 번호를 리턴한다.
    • 예) ZONE_DMA, ZONE_NORMAL을 사용하는 경우 0과 1이 리턴된다.
    • 예) ZONE_NORMAL만 사용하는 경우 0이 리턴된다.

 

set_page_links()

include/linux/mm.h

static inline void set_page_links(struct page *page, enum zone_type zone,
        unsigned long node, unsigned long pfn) 
{
        set_page_zone(page, zone);
        set_page_node(page, node);
#ifdef SECTION_IN_PAGE_FLAGS
        set_page_section(page, pfn_to_section_nr(pfn));
#endif
}
  • page->flags에 zone, node 및 section 정보를 설정한다.

 

set_page_zone()

include/linux/mm.h

static inline void set_page_zone(struct page *page, enum zone_type zone)
{
        page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
        page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}
  • page->flags에 zone 정보를 설정한다.

 

set_page_node()

include/linux/mm.h

static inline void set_page_node(struct page *page, unsigned long node)
{
        page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
        page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
}
  • page->flags에 노드 정보를 설정한다.

 

page_zone_id()

include/linux/mm.h

/*
 * The identification function is mainly used by the buddy allocator for
 * determining if two pages could be buddies. We are not really identifying
 * the zone since we could be using the section number id if we do not have
 * node id available in page flags.
 * We only guarantee that it will return the same value for two combinable
 * pages in a zone.
 */
static inline int page_zone_id(struct page *page)
{
        return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}

페이지에서 zone id를 추출하여 반환한다.

 


Sprsemem 섹션 관련

set_page_section()

include/linux/mm.h

#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
        page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
        page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
}
#endif
  • page->flags에 섹션 정보를 설정한다.

 

__pfn_to_section()

include/linux/mmzone.h

static inline struct mem_section *__pfn_to_section(unsigned long pfn) 
{
        return __nr_to_section(pfn_to_section_nr(pfn));
}

pfn 값에 대응하는 mem_section 구조체 정보를 리턴한다.

  •  pfn_to_section_nr()
    • pfn 값으로 섹션 번호를 알아온다.
  • __nr_to_section()
    • 섹션 번호로 mem_section 구조체 정보를 리턴한다.

 

pfn_to_section_nr()

include/linux/mmzone.h

#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
  • pfn의 섹션(Sparse) 인덱스를 리턴한다.
    • 예) Realview-PBX
      • 섹션 사이즈가 256M 단위(PFN_SECTION_SHIFT=16)이므로 섹션 번호는 0~15까지의 결과

 

__nr_to_section()

include/linux/mmzone.h

static inline struct mem_section *__nr_to_section(unsigned long nr)
{
        if (!mem_section[SECTION_NR_TO_ROOT(nr)])
                return NULL;
        return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
  • 섹션 번호로 mem_section 구조체 정보를 리턴한다.

 

SECTION_NR_TO_ROOT()

include/linux/mmzone.h

#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT)
  • 섹션 번호로 ROOT 번호를 리턴한다.

 

#ifdef CONFIG_SPARSEMEM_EXTREME
#define SECTIONS_PER_ROOT       (PAGE_SIZE / sizeof (struct mem_section))
#else
#define SECTIONS_PER_ROOT       1
#endif
  • ROOT 하나 당 섹션 수
    • PAGE_SIZE(4K)에 mem_section 구조체가 들어갈 수 있는 수

 

present_section_nr()

include/linux/mmzone.h

static inline int present_section_nr(unsigned long nr)
{
        return present_section(__nr_to_section(nr));
}

섹션 번호에 해당하는 mem_section이 준비되어 있는지 확인한다. 준비되어 있지 않은 경우 해당 섹션은 hole을 의미한다.

  • __nr_to_section()
    • 섹션 번호로 mem_section 구조체 정보를 알아온다.
  • present_section()
    • mem_section 구조체 정보에 섹션이 존재하는지 확인한다.

 

present_section()

include/linux/mmzone.h

static inline int present_section(struct mem_section *section)
{
        return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
}

mem_section 구조체 정보에 섹션이 존재하는지 확인한다.

  • SECTION_MARKED_PRESENT 식별 비트가 설정되어 있는지 확인한다.

 

__section_mem_map_addr()

include/linux/mmzone.h

static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
        unsigned long map = section->section_mem_map;
        map &= SECTION_MAP_MASK;
        return (struct page *)map;
}

헤당 Sparse memory 섹션에 대한 mem_map 주소를 반환한다.

include/linux/mmzone.h

/*
 * We use the lower bits of the mem_map pointer to store
 * a little bit of information.  There should be at least
 * 3 bits here due to 32-bit alignment.
 */
#define SECTION_MARKED_PRESENT  (1UL<<0)
#define SECTION_HAS_MEM_MAP     (1UL<<1)
#define SECTION_MAP_LAST_BIT    (1UL<<2)
#define SECTION_MAP_MASK        (~(SECTION_MAP_LAST_BIT-1))
#define SECTION_NID_SHIFT       2

 


페이지의 참조 사용 및 사용 해제

get_page()

include/linux/mm.h

static inline void get_page(struct page *page)
{
        page = compound_head(page);
        /*
         * Getting a normal page or the head of a compound page
         * requires to already have an elevated page->_refcount.
         */
        VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
        page_ref_inc(page);
}

참조 카운터를 1 증가시킨다.

 

get_page_unless_zero()

include/linux/mm.h

/*
 * Try to grab a ref unless the page has a refcount of zero, return false if
 * that is the case.
 * This can be called when MMU is off so it must not access
 * any of the virtual mappings.
 */
static inline int get_page_unless_zero(struct page *page)
{
        return page_ref_add_unless(page, 1, 0);
}

참조 카운터(p->_refcount)를 읽은 후 0 값과 다른 경우에 한해 증가시킨다. 결과 값이 0이 아니면 true를 반환한다.

 

put_page()

include/linux/mm.h

static inline void put_page(struct page *page)
{
        page = compound_head(page);

        /*
         * For devmap managed pages we need to catch refcount transition from
         * 2 to 1, when refcount reach one it means the page is free and we
         * need to inform the device driver through callback. See
         * include/linux/memremap.h and HMM for details.
         */
        if (put_devmap_managed_page(page))
                return;

        if (put_page_testzero(page))
                __put_page(page);
}

참조 카운터를 1 감소 시킨다. 만일 0이되면 페이지의 회수를 진행한다.

 

put_page_testzero()

include/linux/mm.h

/*
 * Methods to modify the page usage count.
 *
 * What counts for a page usage:
 * - cache mapping   (page->mapping)
 * - private data    (page->private)
 * - page mapped in a task's page tables, each mapping
 *   is counted separately
 *
 * Also, many kernel routines increase the page count before a critical
 * routine so they can be sure the page doesn't go away from under them.
 */

/*
 * Drop a ref, return true if the refcount fell to zero (the page has no users)
 */
static inline int put_page_testzero(struct page *page)
{
        VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
        return page_ref_dec_and_test(page);
}

페이지의 참조카운터를 감소시키고 0(사용완료)인지 확인하여 사용완료 여부를 반환한다.

  • 0=사용중, 1=사용완료(참조 _count가 0이된 경우)

 


page vs pfn 변환

PFN과 page 구조체 포인터와의 변환은 다음 2개의 API를 사용한다.

include/asm-generic/memory_model.h

#define page_to_pfn __page_to_pfn
#define pfn_to_page __pfn_to_page
  • page_to_pfn()
    • page 구조체 포인터로 pfn 값을 알아온다.
  • pfn_to_page()
    • pfn 값으로 page 구조체 포인터를 알아온다.

 

다음과 같이 flat 및 sparse물리 메모리 모델에 따라 변환 방법이 달라지며, sparse 물리 모델의 경우 vmemmap 사용여부에 따라 다시 2 가지로 나뉜다.

CONFIG_FLATMEM
#define __pfn_to_page(pfn)      (mem_map + ((pfn) - ARCH_PFN_OFFSET))
#define __page_to_pfn(page)     ((unsigned long)((page) - mem_map) + \
                                 ARCH_PFN_OFFSET)
  • __pfn_to_page()
    • ARCH_PFN_OFFSET은 물리 DRAM의 시작 PFN 값을 가리킨다.
    • mem_map[@pfn – 물리 DRAM 시작 PFN]

 

CONFIG_SPARSEMEM
/*
 * Note: section's mem_map is encoded to reflect its start_pfn.
 * section[i].section_mem_map == mem_map's address - start_pfn;
 */
#define __page_to_pfn(pg)                                       \
({      const struct page *__pg = (pg);                         \
        int __sec = page_to_section(__pg);                      \
        (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \
})      

#define __pfn_to_page(pfn)                              \
({      unsigned long __pfn = (pfn);                    \
        struct mem_section *__sec = __pfn_to_section(__pfn);    \
        __section_mem_map_addr(__sec) + __pfn;          \
})
  • __pfn_to_page()
    • pfn을 섹션 단위로 바꾼 후 mem_section[][]에 접근하여 섹션에 대한 mam_map[@pfn] 주소를 반환한다.

 

CONFIG_SPARSEMEM & CONFIG_SPARSEMEM_VMEMMAP
/* memmap is virtually contiguous.  */
#define __pfn_to_page(pfn)      (vmemmap + (pfn))
#define __page_to_pfn(page)     (unsigned long)((page) - vmemmap)
  • __pfn_to_page()
    • = mem_map[@pfn]
    • vmemmap = mem_map[0]의 가상 주소가 저장되어 있다.

 


페이지 플래그

include/linux/page-flags.h

/*
 * Various page->flags bits:
 *
 * PG_reserved is set for special pages. The "struct page" of such a page
 * should in general not be touched (e.g. set dirty) except by its owner.
 * Pages marked as PG_reserved include:
 * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS,
 *   initrd, HW tables)
 * - Pages reserved or allocated early during boot (before the page allocator
 *   was initialized). This includes (depending on the architecture) the
 *   initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much
 *   much more. Once (if ever) freed, PG_reserved is cleared and they will
 *   be given to the page allocator.
 * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
 *   to read/write these pages might end badly. Don't touch!
 * - The zero page(s)
 * - Pages not added to the page allocator when onlining a section because
 *   they were excluded via the online_page_callback() or because they are
 *   PG_hwpoison.
 * - Pages allocated in the context of kexec/kdump (loaded kernel image,
 *   control pages, vmcoreinfo)
 * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
 *   not marked PG_reserved (as they might be in use by somebody else who does
 *   not respect the caching strategy).
 * - Pages part of an offline section (struct pages of offline sections should
 *   not be trusted as they will be initialized when first onlined).
 * - MCA pages on ia64
 * - Pages holding CPU notes for POWER Firmware Assisted Dump
 * - Device memory (e.g. PMEM, DAX, HMM)
 * Some PG_reserved pages will be excluded from the hibernation image.
 * PG_reserved does in general not hinder anybody from dumping or swapping
 * and is no longer required for remap_pfn_range(). ioremap might require it.
 * Consequently, PG_reserved for a page mapped into user space can indicate
 * the zero page, the vDSO, MMIO pages or device memory.
 *
 * The PG_private bitflag is set on pagecache pages if they contain filesystem
 * specific data (which is normally at page->private). It can be used by
 * private allocations for its own usage.
 *
 * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
 * and cleared when writeback _starts_ or when read _completes_. PG_writeback
 * is set before writeback starts and cleared when it finishes.
 *
 * PG_locked also pins a page in pagecache, and blocks truncation of the file
 * while it is held.
 *
 * page_waitqueue(page) is a wait queue of all tasks waiting for the page
 * to become unlocked.
 *
 * PG_swapbacked is set when a page uses swap as a backing storage.  This are
 * usually PageAnon or shmem pages but please note that even anonymous pages
 * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
 * a result of MADV_FREE).
 *
 * PG_uptodate tells whether the page's contents is valid.  When a read
 * completes, the page becomes uptodate, unless a disk I/O error happened.
 *
 * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
 * file-backed pagecache (see mm/vmscan.c).
 *
 * PG_error is set to indicate that an I/O error occurred on this page.
 *
 * PG_arch_1 is an architecture specific page state bit.  The generic code
 * guarantees that this bit is cleared for a page when it first is entered into
 * the page cache.
 *
 * PG_hwpoison indicates that a page got corrupted in hardware and contains
 * data with incorrect ECC bits that triggered a machine check. Accessing is
 * not safe since it may cause another machine check. Don't touch!
 */
/*
 * Don't use the pageflags directly.  Use the PageFoo macros.
 *
 * The page flags field is split into two parts, the main flags area
 * which extends from the low bits upwards, and the fields area which
 * extends from the high bits downwards.
 *
 *  | FIELD | ... | FLAGS |
 *  N-1           ^       0
 *               (NR_PAGEFLAGS)
 *
 * The fields area is reserved for fields mapping zone, node (for NUMA) and
 * SPARSEMEM section (for variants of SPARSEMEM that require section ids like
 * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP).
enum pageflags {
        PG_locked,              /* Page is locked. Don't touch. */
        PG_referenced,
        PG_uptodate,
        PG_dirty,
        PG_lru,
        PG_active,
        PG_workingset,
        PG_waiters,             /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
        PG_error,
        PG_slab,
        PG_owner_priv_1,        /* Owner use. If pagecache, fs may use*/
        PG_arch_1,
        PG_reserved,
        PG_private,             /* If pagecache, has fs-private data */
        PG_private_2,           /* If pagecache, has fs aux data */
        PG_writeback,           /* Page is under writeback */
        PG_head,                /* A head page */
        PG_mappedtodisk,        /* Has blocks allocated on-disk */
        PG_reclaim,             /* To be reclaimed asap */
        PG_swapbacked,          /* Page is backed by RAM/swap */
        PG_unevictable,         /* Page is "unevictable"  */
#ifdef CONFIG_MMU
        PG_mlocked,             /* Page is vma mlocked */
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
        PG_uncached,            /* Page has been mapped as uncached */
#endif
#ifdef CONFIG_MEMORY_FAILURE
        PG_hwpoison,            /* hardware poisoned page. Don't touch */
#endif
#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
        PG_young,
        PG_idle,
#endif
#ifdef CONFIG_64BIT
        PG_arch_2,
#endif
#ifdef CONFIG_KASAN_HW_TAGS
        PG_skip_kasan_poison,
#endif
        __NR_PAGEFLAGS,

        /* Filesystems */
        PG_checked = PG_owner_priv_1,

        /* SwapBacked */
        PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */

        /* Two page bits are conscripted by FS-Cache to maintain local caching
         * state.  These bits are set on pages belonging to the netfs's inodes
         * when those inodes are being locally cached.
         */
        PG_fscache = PG_private_2,      /* page backed by cache */

        /* XEN */
        /* Pinned in Xen as a read-only pagetable page. */
        PG_pinned = PG_owner_priv_1,
        /* Pinned as part of domain save (see xen_mm_pin_all()). */
        PG_savepinned = PG_dirty,
        /* Has a grant mapping of another (foreign) domain's page. */
        PG_foreign = PG_owner_priv_1,
        /* Remapped by swiotlb-xen. */
        PG_xen_remapped = PG_owner_priv_1,

        /* SLOB */
        PG_slob_free = PG_private,

        /* Compound pages. Stored in first tail page's flags */
        PG_double_map = PG_workingset,

#ifdef CONFIG_MEMORY_FAILURE
        /*
         * Compound pages. Stored in first tail page's flags.
         * Indicates that at least one subpage is hwpoisoned in the
         * THP.
         */
        PG_has_hwpoisoned = PG_mappedtodisk,
#endif

        /* non-lru isolated movable page */
        PG_isolated = PG_reclaim,

        /* Only valid for buddy pages. Used to track pages that are reported */
        PG_reported = PG_uptodate,
};

 

page->flags에 기록되는 추가 정보

linux/page-flags-layout.h

/*
 * page->flags layout:
 *
 * There are five possibilities for how page->flags get laid out.  The first
 * pair is for the normal case without sparsemem. The second pair is for
 * sparsemem when there is plenty of space for node and section information.
 * The last is when there is insufficient space in page->flags and a separate
 * lookup is necessary.
 *
 * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |             ... | FLAGS |
 *      " plus space for last_cpupid: |       NODE     | ZONE | LAST_CPUPID ... | FLAGS |
 * classic sparse with space for node:| SECTION | NODE | ZONE |             ... | FLAGS |
 *      " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS |
 * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
 */

커널 설정에 따라 page->flags에 플래그들 이외에 SECTION, NODE, ZONE 및 LAST_CPUPID 정보 등이 기록된다.

 


Reserved 플래그(예)

PageReserved(), SetPageReserved(), ClearPageReserved(), __ClearPageReserved()

include/linux/page-flags.h

PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
  • PageReserved(), SetPageReserved(), ClearPageReserved() 및 __ClearPageReserved() static inline 함수가 만들어진다.

 

#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)               \
        SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
  • 아래 매크로를 사용하여 PageXXX(), SetPageXXX() 및 ClearPageXXX() static inline 함수가 만들어진다.

 

/*
 * Macros to create function definitions for page flags
 */
#define TESTPAGEFLAG(uname, lname)                                      \
static inline int Page##uname(const struct page *page)                  \
                        { return test_bit(PG_##lname, &page->flags); }

#define SETPAGEFLAG(uname, lname)                                       \
static inline void SetPage##uname(struct page *page)                    \
                        { set_bit(PG_##lname, &page->flags); }

#define CLEARPAGEFLAG(uname, lname)                                     \
static inline void ClearPage##uname(struct page *page)                  \
                        { clear_bit(PG_##lname, &page->flags); }

 

#define __CLEARPAGEFLAG(uname, lname)                                   \
static inline void __ClearPage##uname(struct page *page)                \
                        { __clear_bit(PG_##lname, &page->flags); }
  • test_bit()
    • &page->flags의 PG_xxxxx 번호 비트가 set되었는지 여부를 알아온다.
  • set_bit()
    • &page->flags의 PG_xxxxx 번호 비트를 atomic하게 set 한다.
  • clear_bit()
    • &page->flags의 PG_xxxxx 번호 비트를 atomic하게 clear 한다.
  • __clear_bit()
    • &page->flags의 PG_xxxxx 번호 비트를 clear 한다. (non-atomic)

 


일부 플래그의 재편성 (p->page_type)

아래 4개의 PG_buddy, PG_ballon, PG_kmemcg, PG_table 플래그는 p->_mapcount와 같이 사용하는 것으로 바뀌었고, 다시 유니언 선언하여 공유된 p->page_type을 사용한다.

  • 최초 p->flags에서 관리되던 플래그들이 p->_mapcount로 분리되었었다.
  •  후 새 커널에서는 p->_mapcount 대신 유니온으로 공유된 p->page_type를 사용한다. 단  p->_mapcount의 초기 값이 -1(0xffff_ffff)이므로 비트의 설정과 해제는 반대로 사용한다.
    • 예) Set Buddy
      • old 커널: p->_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE(-128)
      • new 커널: p->page_type &= ~0x80
    • 예) Clear Buddy
      • p->_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE(-1)
      • new 커널: p->page_type |= 0x80
    • 참고: mm: split page_type out from _mapcount

 

include/linux/page-flags.h

/*
 * PageBuddy() indicates that the page is free and in the buddy system
 * (see mm/page_alloc.c).
 */
PAGE_TYPE_OPS(Buddy, buddy)

/*
 * PageBalloon() is true for pages that are on the balloon page list
 * (see mm/balloon_compaction.c).
 */
PAGE_TYPE_OPS(Balloon, balloon)

/*
 * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
 * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
 */
PAGE_TYPE_OPS(Kmemcg, kmemcg)

/*
 * Marks pages in use as page tables.
 */
PAGE_TYPE_OPS(Table, table)

 

/*
 * For pages that are never mapped to userspace (and aren't PageSlab),
 * page_type may be used.  Because it is initialised to -1, we invert the
 * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
 * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
 * low bits so that an underflow or overflow of page_mapcount() won't be
 * mistaken for a page type value.
 */
#define PAGE_TYPE_BASE  0xf0000000
/* Reserve              0x0000007f to catch underflows of page_mapcount */
#define PAGE_MAPCOUNT_RESERVE   -128
#define PG_buddy        0x00000080
#define PG_balloon      0x00000100
#define PG_kmemcg       0x00000200
#define PG_table        0x00000400

#define PageType(page, flag)                                            \
        ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)

static inline int page_has_type(struct page *page)
{
        return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
}

#define PAGE_TYPE_OPS(uname, lname)                                     \
static __always_inline int Page##uname(struct page *page)               \
{                                                                       \
        return PageType(page, PG_##lname);                              \
}                                                                       \
static __always_inline void __SetPage##uname(struct page *page)         \
{                                                                       \
        VM_BUG_ON_PAGE(!PageType(page, 0), page);                       \
        page->page_type &= ~PG_##lname;                                 \
}                                                                       \
static __always_inline void __ClearPage##uname(struct page *page)       \
{                                                                       \
        VM_BUG_ON_PAGE(!Page##uname(page), page);                       \
        page->page_type |= PG_##lname;                                  \
}

위의 매크로를 통해 PageBuddy(), __SetPageBuddy(), __ClearPageBuddy() 등의 인라인 함수가 생성된다.

 

 


페이지 블럭 관련

set_pageblock_flags_group()

linux/pageblock-flags.h

#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \
        set_pfnblock_flags_mask(page, flags, page_to_pfn(page),         \
                        end_bitidx,                                     \
                        (1 << (end_bitidx - start_bitidx + 1)) - 1)

 

set_pfnblock_flags_mask()

mm/page_alloc.c

/**
 * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
 * @page: The page within the block of interest
 * @flags: The flags to set
 * @pfn: The target page frame number
 * @end_bitidx: The last bit of interest
 * @mask: mask of bits that the caller is interested in
 */
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
                                        unsigned long pfn,
                                        unsigned long end_bitidx,
                                        unsigned long mask)
{
        struct zone *zone;      
        unsigned long *bitmap;
        unsigned long bitidx, word_bitidx;
        unsigned long old_word, word;

        BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);

        zone = page_zone(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
        word_bitidx = bitidx / BITS_PER_LONG;
        bitidx &= (BITS_PER_LONG-1);

        VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);

        bitidx += end_bitidx;
        mask <<= (BITS_PER_LONG - bitidx - 1);
        flags <<= (BITS_PER_LONG - bitidx - 1); 

        word = ACCESS_ONCE(bitmap[word_bitidx]);
        for (;;) {
                old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
                if (word == old_word)
                        break;
                word = old_word;
        }            
}

 

get_pfnblock_flags_mask()

mm/page_alloc.c

/**
 * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages              
 * @page: The page within the block of interest
 * @pfn: The target page frame number
 * @end_bitidx: The last bit of interest to retrieve
 * @mask: mask of bits that the caller is interested in
 *              
 * Return: pageblock_bits flags
 */
unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
                                        unsigned long end_bitidx,
                                        unsigned long mask)
{
        struct zone *zone;
        unsigned long *bitmap;
        unsigned long bitidx, word_bitidx;
        unsigned long word;

        zone = page_zone(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
        word_bitidx = bitidx / BITS_PER_LONG;
        bitidx &= (BITS_PER_LONG-1);

        word = bitmap[word_bitidx];
        bitidx += end_bitidx;
        return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
}

 

get_pageblock_bitmap()

mm/page_alloc.c

/* Return a pointer to the bitmap storing bits affecting a block of pages */
static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
                                                        unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
        return __pfn_to_section(pfn)->pageblock_flags;
#else
        return zone->pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
}

@pfn이 포함된 페이지 블럭 비트맵을 반환한다. (usemap)

  • usemap에는 4비트로 표현된 mobility 플래그들이 저장된다.

 

pfn_to_bitidx()

mm/page_alloc.c

static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
        pfn &= (PAGES_PER_SECTION-1);
        return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#else
        pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
        return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#endif /* CONFIG_SPARSEMEM */
}

pfn에 대한 pageblock에서 비트 인덱스를 반환한다.

 

SECTION_BLOCKFLAGS_BITS

include/linux/mmzone.h

#define SECTION_BLOCKFLAGS_BITS \
        ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)

섹션 당 pageblock 비트 수

  • NR_PAGEBLOCK_BITS
    • pageblock에 대해 필요한 비트 수=4
  • PFN_SECTION_SHIFT
    • 섹션 길이 표현에 필요한 비트 수 – 페이지 길이 표현에 필요한 비트 수를 뺀 값
      • arm64: 섹션 길이=27(128M 표현) bits – 12(4KB 표현) bits = 15
  • 예) arm64에서 섹션 크기=128M, pageblock_order=9인 경우
    • SECTION_BLOCKFLAGS_BITS  = 2^(15-9) * 4 bits = 256 bits

 

참고

 

댓글 남기기