Mem_map (page API)

Mem_map (page API)

 

page_order()

mm/internal.h

/*
 * This function returns the order of a free page in the buddy system. In
 * general, page_zone(page)->lock must be held by the caller to prevent the
 * page from being allocated in parallel and returning garbage as the order.
 * If a caller does not hold page_zone(page)->lock, it must guarantee that the
 * page cannot be allocated or merged in parallel. Alternatively, it must
 * handle invalid values gracefully, and use page_order_unsafe() below.
 */
static inline unsigned long page_order(struct page *page)
{
        /* PageBuddy() must be checked by the caller */
        return page_private(page);
}

include/linux/mm.h

#define page_private(page)              ((page)->private)

페이지에 대한 order 값을 반환한다.

  • private 필드는 버디 시스템에서 관리할 때에는 order 값을 가진다.

 

for_each_migratetype_order()

include/linux/mmzone.h

#define for_each_migratetype_order(order, type) \
        for (order = 0; order < MAX_ORDER; order++) \
                for (type = 0; type < MIGRATE_TYPES; type++)
  • buddy 메모리 할당자가 사용하는 MAX_ORDER(11)  수 만큼 루프를 돈다.
  • 메모리 hotplug에 대한 이주 플래그 관리를 담당하는 MIGRATE_TYPES 만큼 루프를 돈다.

 


존 및 노드 관련

is_highmem_idx()

include/linux/mmzone.h

static inline int is_highmem_idx(enum zone_type idx)
{
#ifdef CONFIG_HIGHMEM
        return (idx == ZONE_HIGHMEM ||
                (idx == ZONE_MOVABLE && zone_movable_is_highmem())); 
#else
        return 0;
#endif
}

 

zone_movable_is_highmem()

include/linux/mmzone.h

static inline int zone_movable_is_highmem(void)
{
#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
        return movable_zone == ZONE_HIGHMEM;
#elif defined(CONFIG_HIGHMEM)
        return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
#else
        return 0;
#endif
}

 

zone_idx()

include/linux/mmzone.h

/*
 * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
 */
#define zone_idx(zone)          ((zone) - (zone)->zone_pgdat->node_zones)
  • zone 인덱스 번호를 리턴한다.
    • 예) ZONE_DMA, ZONE_NORMAL을 사용하는 경우 0과 1이 리턴된다.
    • 예) ZONE_NORMAL만 사용하는 경우 0이 리턴된다.

 

set_page_links()

include/linux/mm.h

static inline void set_page_links(struct page *page, enum zone_type zone,
        unsigned long node, unsigned long pfn) 
{
        set_page_zone(page, zone);
        set_page_node(page, node);
#ifdef SECTION_IN_PAGE_FLAGS
        set_page_section(page, pfn_to_section_nr(pfn));
#endif
}
  • page->flags에 zone, node 및 section 정보를 설정한다.
 * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |             ... | FLAGS |
 *      " plus space for last_cpupid: |       NODE     | ZONE | LAST_CPUPID ... | FLAGS |
 * classic sparse with space for node:| SECTION | NODE | ZONE |             ... | FLAGS |
 *      " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS |
 * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |

 

set_page_zone()

include/linux/mm.h

static inline void set_page_zone(struct page *page, enum zone_type zone)
{
        page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
        page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}
  • page->flags에 zone 정보를 설정한다.

 

set_page_node()

include/linux/mm.h

static inline void set_page_node(struct page *page, unsigned long node)
{
        page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
        page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
}
  • page->flags에 노드 정보를 설정한다.

 

page_zone_id()

include/linux/mm.h

/*
 * The identification function is mainly used by the buddy allocator for
 * determining if two pages could be buddies. We are not really identifying
 * the zone since we could be using the section number id if we do not have
 * node id available in page flags.
 * We only guarantee that it will return the same value for two combinable
 * pages in a zone.
 */
static inline int page_zone_id(struct page *page)
{
        return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}

페이지에서 zone id를 추출하여 반환한다.

 


Sprsemem 섹션 관련

set_page_section()

include/linux/mm.h

#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
        page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
        page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
}
#endif
  • page->flags에 섹션 정보를 설정한다.

 

__pfn_to_section()

include/linux/mmzone.h

static inline struct mem_section *__pfn_to_section(unsigned long pfn) 
{
        return __nr_to_section(pfn_to_section_nr(pfn));
}

pfn 값에 대응하는 mem_section 구조체 정보를 리턴한다.

  •  pfn_to_section_nr()
    • pfn 값으로 섹션 번호를 알아온다.
  • __nr_to_section()
    • 섹션 번호로 mem_section 구조체 정보를 리턴한다.

 

pfn_to_section_nr()

include/linux/mmzone.h

#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
  • pfn의 섹션(Sparse) 인덱스를 리턴한다.
    • 예) Realview-PBX
      • 섹션 사이즈가 256M 단위(PFN_SECTION_SHIFT=16)이므로 섹션 번호는 0~15까지의 결과

 

__nr_to_section()

include/linux/mmzone.h

static inline struct mem_section *__nr_to_section(unsigned long nr)
{
        if (!mem_section[SECTION_NR_TO_ROOT(nr)])
                return NULL;
        return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
  • 섹션 번호로 mem_section 구조체 정보를 리턴한다.

 

SECTION_NR_TO_ROOT()

include/linux/mmzone.h

#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT)
  • 섹션 번호로 ROOT 번호를 리턴한다.

 

#ifdef CONFIG_SPARSEMEM_EXTREME
#define SECTIONS_PER_ROOT       (PAGE_SIZE / sizeof (struct mem_section))
#else
#define SECTIONS_PER_ROOT       1
#endif
  • ROOT 하나 당 섹션 수
    • PAGE_SIZE(4K)에 mem_section 구조체가 들어갈 수 있는 수

 

present_section_nr()

include/linux/mmzone.h

static inline int present_section_nr(unsigned long nr)
{
        return present_section(__nr_to_section(nr));
}

섹션 번호에 해당하는 mem_section이 준비되어 있는지 확인한다. 준비되어 있지 않은 경우 해당 섹션은 hole을 의미한다.

  • __nr_to_section()
    • 섹션 번호로 mem_section 구조체 정보를 알아온다.
  • present_section()
    • mem_section 구조체 정보에 섹션이 존재하는지 확인한다.

 

present_section()

include/linux/mmzone.h

static inline int present_section(struct mem_section *section)
{
        return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
}

mem_section 구조체 정보에 섹션이 존재하는지 확인한다.

  • SECTION_MARKED_PRESENT 식별 비트가 설정되어 있는지 확인한다.

 

__section_mem_map_addr()

include/linux/mmzone.h

static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
        unsigned long map = section->section_mem_map;
        map &= SECTION_MAP_MASK;
        return (struct page *)map;
}

헤당 Sparse memory 섹션에 대한 mem_map 주소를 반환한다.

include/linux/mmzone.h

/*
 * We use the lower bits of the mem_map pointer to store
 * a little bit of information.  There should be at least
 * 3 bits here due to 32-bit alignment.
 */
#define SECTION_MARKED_PRESENT  (1UL<<0)
#define SECTION_HAS_MEM_MAP     (1UL<<1)
#define SECTION_MAP_LAST_BIT    (1UL<<2)
#define SECTION_MAP_MASK        (~(SECTION_MAP_LAST_BIT-1))
#define SECTION_NID_SHIFT       2

 


페이지의 참조 사용 및 사용 해제

get_page()

include/linux/mm.h

static inline void get_page(struct page *page)
{
        page = compound_head(page);
        /*
         * Getting a normal page or the head of a compound page
         * requires to already have an elevated page->_refcount.
         */
        VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
        page_ref_inc(page);
}

참조 카운터를 1 증가시킨다.

 

get_page_unless_zero()

include/linux/mm.h

/*
 * Try to grab a ref unless the page has a refcount of zero, return false if
 * that is the case.
 * This can be called when MMU is off so it must not access
 * any of the virtual mappings.
 */
static inline int get_page_unless_zero(struct page *page)
{
        return page_ref_add_unless(page, 1, 0);
}

참조 카운터(p->_refcount)를 읽은 후 0 값과 다른 경우에 한해 증가시킨다. 결과 값이 0이 아니면 true를 반환한다.

 

put_page()

include/linux/mm.h

static inline void put_page(struct page *page)
{
        page = compound_head(page);

        /*
         * For devmap managed pages we need to catch refcount transition from
         * 2 to 1, when refcount reach one it means the page is free and we
         * need to inform the device driver through callback. See
         * include/linux/memremap.h and HMM for details.
         */
        if (put_devmap_managed_page(page))
                return;

        if (put_page_testzero(page))
                __put_page(page);
}

참조 카운터를 1 감소 시킨다. 만일 0이되면 페이지의 회수를 진행한다.

 

put_page_testzero()

include/linux/mm.h

/*
 * Methods to modify the page usage count.
 *
 * What counts for a page usage:
 * - cache mapping   (page->mapping)
 * - private data    (page->private)
 * - page mapped in a task's page tables, each mapping
 *   is counted separately
 *
 * Also, many kernel routines increase the page count before a critical
 * routine so they can be sure the page doesn't go away from under them.
 */

/*
 * Drop a ref, return true if the refcount fell to zero (the page has no users)
 */
static inline int put_page_testzero(struct page *page)
{
        VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
        return page_ref_dec_and_test(page);
}

페이지의 참조카운터를 감소시키고 0(사용완료)인지 확인하여 사용완료 여부를 반환한다.

  • 0=사용중, 1=사용완료(참조 _count가 0이된 경우)

 


page vs pfn 변환

include/asm-generic/memory_model.h

#define page_to_pfn __page_to_pfn

다음 4가지 커널 옵션 설정에 따라 함수가 선택된다.

  • CONFIG_FLATMEM
#define __pfn_to_page(pfn)      (mem_map + ((pfn) - ARCH_PFN_OFFSET))
#define __page_to_pfn(page)     ((unsigned long)((page) - mem_map) + \
                                 ARCH_PFN_OFFSET)
  • CONFIG_DISCONTIGMEM
#define __pfn_to_page(pfn)                      \
({      unsigned long __pfn = (pfn);            \       
        unsigned long __nid = arch_pfn_to_nid(__pfn);  \
        NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
})              

#define __page_to_pfn(pg)                                               \
({      const struct page *__pg = (pg);                                 \
        struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg));     \
        (unsigned long)(__pg - __pgdat->node_mem_map) +                 \
         __pgdat->node_start_pfn;                                       \
})
  • CONFIG_SPARSEMEM
/*
 * Note: section's mem_map is encoded to reflect its start_pfn.
 * section[i].section_mem_map == mem_map's address - start_pfn;
 */
#define __page_to_pfn(pg)                                       \
({      const struct page *__pg = (pg);                         \
        int __sec = page_to_section(__pg);                      \
        (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \
})      

#define __pfn_to_page(pfn)                              \
({      unsigned long __pfn = (pfn);                    \
        struct mem_section *__sec = __pfn_to_section(__pfn);    \
        __section_mem_map_addr(__sec) + __pfn;          \
})
  • CONFIG_SPARSEMEM_VMEMMAP
/* memmap is virtually contiguous.  */
#define __pfn_to_page(pfn)      (vmemmap + (pfn))
#define __page_to_pfn(page)     (unsigned long)((page) - vmemmap)

 


페이지 플래그

include/linux/page-flags.h

/*
 * Various page->flags bits:
 *
 * PG_reserved is set for special pages, which can never be swapped out. Some
 * of them might not even exist (eg empty_bad_page)...
 *
 * The PG_private bitflag is set on pagecache pages if they contain filesystem
 * specific data (which is normally at page->private). It can be used by
 * private allocations for its own usage.
 *
 * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
 * and cleared when writeback _starts_ or when read _completes_. PG_writeback
 * is set before writeback starts and cleared when it finishes.
 *
 * PG_locked also pins a page in pagecache, and blocks truncation of the file
 * while it is held.
 *
 * page_waitqueue(page) is a wait queue of all tasks waiting for the page
 * to become unlocked.
 *
 * PG_uptodate tells whether the page's contents is valid.  When a read
 * completes, the page becomes uptodate, unless a disk I/O error happened.
 *
 * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
 * file-backed pagecache (see mm/vmscan.c).
 *
 * PG_error is set to indicate that an I/O error occurred on this page.
 *
 * PG_arch_1 is an architecture specific page state bit.  The generic code
 * guarantees that this bit is cleared for a page when it first is entered into
 * the page cache.
 *
 * PG_highmem pages are not permanently mapped into the kernel virtual address
 * space, they need to be kmapped separately for doing IO on the pages.  The
 * struct page (these bits with information) are always mapped into kernel
 * address space...
 *
 * PG_hwpoison indicates that a page got corrupted in hardware and contains
 * data with incorrect ECC bits that triggered a machine check. Accessing is
 * not safe since it may cause another machine check. Don't touch!
 */

/*
 * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
 * locked- and dirty-page accounting.
 *
 * The page flags field is split into two parts, the main flags area
 * which extends from the low bits upwards, and the fields area which
 * extends from the high bits downwards.
 *
 *  | FIELD | ... | FLAGS |
 *  N-1           ^       0
 *               (NR_PAGEFLAGS)
 *
 * The fields area is reserved for fields mapping zone, node (for NUMA) and
 * SPARSEMEM section (for variants of SPARSEMEM that require section ids like
 * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP).
 */

 

enum pageflags {
        PG_locked,              /* Page is locked. Don't touch. */
        PG_error,
        PG_referenced,
        PG_uptodate,
        PG_dirty,
        PG_lru,
        PG_active,
        PG_slab,
        PG_owner_priv_1,        /* Owner use. If pagecache, fs may use*/
        PG_arch_1,
        PG_reserved,
        PG_private,             /* If pagecache, has fs-private data */
        PG_private_2,           /* If pagecache, has fs aux data */
        PG_writeback,           /* Page is under writeback */
#ifdef CONFIG_PAGEFLAGS_EXTENDED
        PG_head,                /* A head page */
        PG_tail,                /* A tail page */
#else
        PG_compound,            /* A compound page */
#endif
        PG_swapcache,           /* Swap page: swp_entry_t in private */
        PG_mappedtodisk,        /* Has blocks allocated on-disk */
        PG_reclaim,             /* To be reclaimed asap */
        PG_swapbacked,          /* Page is backed by RAM/swap */
        PG_unevictable,         /* Page is "unevictable"  */
#ifdef CONFIG_MMU
        PG_mlocked,             /* Page is vma mlocked */
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
        PG_uncached,            /* Page has been mapped as uncached */
#endif
#ifdef CONFIG_MEMORY_FAILURE
        PG_hwpoison,            /* hardware poisoned page. Don't touch */
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        PG_compound_lock,
#endif
        __NR_PAGEFLAGS,

        /* Filesystems */
        PG_checked = PG_owner_priv_1,

        /* Two page bits are conscripted by FS-Cache to maintain local caching
         * state.  These bits are set on pages belonging to the netfs's inodes
         * when those inodes are being locally cached.
         */
        PG_fscache = PG_private_2,      /* page backed by cache */

        /* XEN */
        /* Pinned in Xen as a read-only pagetable page. */
        PG_pinned = PG_owner_priv_1,
        /* Pinned as part of domain save (see xen_mm_pin_all()). */
        PG_savepinned = PG_dirty,
        /* Has a grant mapping of another (foreign) domain's page. */
        PG_foreign = PG_owner_priv_1,

        /* SLOB */
        PG_slob_free = PG_private,
};

 

Reserved 플래그(예)

PageReserved(), SetPageReserved(), ClearPageReserved(), __ClearPageReserved()

include/linux/page-flags.h

PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
  • PageReserved(), SetPageReserved(), ClearPageReserved() 및 __ClearPageReserved() static inline 함수가 만들어진다.

 

#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)               \
        SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
  • 아래 매크로를 사용하여 PageXXX(), SetPageXXX() 및 ClearPageXXX() static inline 함수가 만들어진다.

 

/*
 * Macros to create function definitions for page flags
 */
#define TESTPAGEFLAG(uname, lname)                                      \
static inline int Page##uname(const struct page *page)                  \
                        { return test_bit(PG_##lname, &page->flags); }

#define SETPAGEFLAG(uname, lname)                                       \
static inline void SetPage##uname(struct page *page)                    \
                        { set_bit(PG_##lname, &page->flags); }

#define CLEARPAGEFLAG(uname, lname)                                     \
static inline void ClearPage##uname(struct page *page)                  \
                        { clear_bit(PG_##lname, &page->flags); }

 

#define __CLEARPAGEFLAG(uname, lname)                                   \
static inline void __ClearPage##uname(struct page *page)                \
                        { __clear_bit(PG_##lname, &page->flags); }
  • test_bit()
    • &page->flags의 PG_xxxxx 번호 비트가 set되었는지 여부를 알아온다.
  • set_bit()
    • &page->flags의 PG_xxxxx 번호 비트를 atomic하게 set 한다.
  • clear_bit()
    • &page->flags의 PG_xxxxx 번호 비트를 atomic하게 clear 한다.
  • __clear_bit()
    • &page->flags의 PG_xxxxx 번호 비트를 clear 한다. (non-atomic)

 


일부 플래그의 재편성

아래 4개의 PG_buddy, PG_ballon, PG_kmemcg, PG_table 플래그는 p->_mapcount와 같이 사용하는 것으로 바뀌었고, 다시 유니언 선언하여 공유된 p->page_type을 사용한다.

  • 최초 p->flags에서 관리되던 플래그들이 p->_mapcount로 분리되었었다.
  •  후 새 커널에서는 p->_mapcount 대신 유니온으로 공유된 p->page_type를 사용한다. 단  p->_mapcount의 초기 값이 -1(0xffff_ffff)이므로 비트의 설정과 해제는 반대로 사용한다.
    • 예) Set Buddy
      • old 커널: p->_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE(-128)
      • new 커널: p->page_type &= ~0x80
    • 예) Clear Buddy
      • p->_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE(-1)
      • new 커널: p->page_type |= 0x80
    • 참고: mm: split page_type out from _mapcount

 

include/linux/page-flags.h

/*
 * PageBuddy() indicates that the page is free and in the buddy system
 * (see mm/page_alloc.c).
 */
PAGE_TYPE_OPS(Buddy, buddy)

/*
 * PageBalloon() is true for pages that are on the balloon page list
 * (see mm/balloon_compaction.c).
 */
PAGE_TYPE_OPS(Balloon, balloon)

/*
 * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
 * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
 */
PAGE_TYPE_OPS(Kmemcg, kmemcg)

/*
 * Marks pages in use as page tables.
 */
PAGE_TYPE_OPS(Table, table)

 

/*
 * For pages that are never mapped to userspace (and aren't PageSlab),
 * page_type may be used.  Because it is initialised to -1, we invert the
 * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
 * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
 * low bits so that an underflow or overflow of page_mapcount() won't be
 * mistaken for a page type value.
 */
#define PAGE_TYPE_BASE  0xf0000000
/* Reserve              0x0000007f to catch underflows of page_mapcount */
#define PAGE_MAPCOUNT_RESERVE   -128
#define PG_buddy        0x00000080
#define PG_balloon      0x00000100
#define PG_kmemcg       0x00000200
#define PG_table        0x00000400

#define PageType(page, flag)                                            \
        ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)

static inline int page_has_type(struct page *page)
{
        return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
}

#define PAGE_TYPE_OPS(uname, lname)                                     \
static __always_inline int Page##uname(struct page *page)               \
{                                                                       \
        return PageType(page, PG_##lname);                              \
}                                                                       \
static __always_inline void __SetPage##uname(struct page *page)         \
{                                                                       \
        VM_BUG_ON_PAGE(!PageType(page, 0), page);                       \
        page->page_type &= ~PG_##lname;                                 \
}                                                                       \
static __always_inline void __ClearPage##uname(struct page *page)       \
{                                                                       \
        VM_BUG_ON_PAGE(!Page##uname(page), page);                       \
        page->page_type |= PG_##lname;                                  \
}

위의 매크로를 통해 PageBuddy(), __SetPageBuddy(), __ClearPageBuddy() 등의 인라인 함수가 생성된다.

 

 


페이지 블럭 관련

set_pageblock_flags_group()

linux/pageblock-flags.h

#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \
        set_pfnblock_flags_mask(page, flags, page_to_pfn(page),         \
                        end_bitidx,                                     \
                        (1 << (end_bitidx - start_bitidx + 1)) - 1)

 

set_pfnblock_flags_mask()

mm/page_alloc.c

/**
 * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
 * @page: The page within the block of interest
 * @flags: The flags to set
 * @pfn: The target page frame number
 * @end_bitidx: The last bit of interest
 * @mask: mask of bits that the caller is interested in
 */
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
                                        unsigned long pfn,
                                        unsigned long end_bitidx,
                                        unsigned long mask)
{
        struct zone *zone;      
        unsigned long *bitmap;
        unsigned long bitidx, word_bitidx;
        unsigned long old_word, word;

        BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);

        zone = page_zone(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
        word_bitidx = bitidx / BITS_PER_LONG;
        bitidx &= (BITS_PER_LONG-1);

        VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);

        bitidx += end_bitidx;
        mask <<= (BITS_PER_LONG - bitidx - 1);
        flags <<= (BITS_PER_LONG - bitidx - 1); 

        word = ACCESS_ONCE(bitmap[word_bitidx]);
        for (;;) {
                old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
                if (word == old_word)
                        break;
                word = old_word;
        }            
}

 

get_pfnblock_flags_mask()

mm/page_alloc.c

/**
 * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages              
 * @page: The page within the block of interest
 * @pfn: The target page frame number
 * @end_bitidx: The last bit of interest to retrieve
 * @mask: mask of bits that the caller is interested in
 *              
 * Return: pageblock_bits flags
 */
unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
                                        unsigned long end_bitidx,
                                        unsigned long mask)
{
        struct zone *zone;
        unsigned long *bitmap;
        unsigned long bitidx, word_bitidx;
        unsigned long word;

        zone = page_zone(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
        word_bitidx = bitidx / BITS_PER_LONG;
        bitidx &= (BITS_PER_LONG-1);

        word = bitmap[word_bitidx];
        bitidx += end_bitidx;
        return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
}

 

get_pageblock_bitmap()

mm/page_alloc.c

/* Return a pointer to the bitmap storing bits affecting a block of pages */
static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
                                                        unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
        return __pfn_to_section(pfn)->pageblock_flags;
#else
        return zone->pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
}

@pfn이 포함된 페이지 블럭 비트맵을 반환한다. (usemap)

  • usemap에는 4비트로 표현된 mobility 플래그들이 저장된다.

 

pfn_to_bitidx()

mm/page_alloc.c

static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
        pfn &= (PAGES_PER_SECTION-1);
        return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#else
        pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
        return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#endif /* CONFIG_SPARSEMEM */
}

pfn에 대한 pageblock에서 비트 인덱스를 반환한다.

 

SECTION_BLOCKFLAGS_BITS

include/linux/mmzone.h

#define SECTION_BLOCKFLAGS_BITS \
        ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)

섹션 당 pageblock 비트 수

  • NR_PAGEBLOCK_BITS
    • pageblock에 대해 필요한 비트 수=4
  • PFN_SECTION_SHIFT
    • 섹션 길이 표현에 필요한 비트 수 – 페이지 길이 표현에 필요한 비트 수를 뺀 값
      • arm64: 섹션 길이=30(1GB 표현) bits – 12(4KB 표현) bits = 18
  • 예) arm64에서 섹션 크기=1G, pageblock_order=10인 경우
    • 2^(18-10) * 4 = 1024개

 

참고

댓글 남기기

이메일은 공개되지 않습니다. 필수 입력창은 * 로 표시되어 있습니다