ZONE 비트맵 (API)

 

for_each_zone_zonelist()

include/linux/mmzone.h

/**             
 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
 * @zone - The current zone in the iterator
 * @z - The current pointer within zonelist->zones being iterated
 * @zlist - The zonelist being iterated
 * @highidx - The zone index of the highest zone to return
 *
 * This iterator iterates though all zones at or below a given zone index.
 */
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
        for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)

zonelist에서 highest_zoneidx 이하의 zone 엔트리들에 대해 루프를 돌며 zone과 z(zoneref)를 반환한다.

 

for_each_zone_zonelist_nodemask()

include/linux/mmzone.h

/**
 * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
 * @zone - The current zone in the iterator
 * @z - The current pointer within zonelist->zones being iterated
 * @zlist - The zonelist being iterated
 * @highidx - The zone index of the highest zone to return
 * @nodemask - Nodemask allowed by the allocator
 *
 * This iterator iterates though all zones at or below a given zone index and
 * within a given nodemask
 */
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
        for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
                zone;                                                   \
                z = next_zones_zonelist(++z, highidx, nodemask),        \
                        zone = zonelist_zone(z))                        \

zonelist에서 highest_zoneidx 이하의 zone이면서 nodes 비트맵에 설정된 노드들인 zone 엔트리들에 대해 루프를 돌며 zone과 z(zoneref)를 반환한다.

 

/**
 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
 * @zonelist - The zonelist to search for a suitable zone
 * @highest_zoneidx - The zone index of the highest zone to return
 * @nodes - An optional nodemask to filter the zonelist with
 * @zone - The first suitable zone found is returned via this parameter
 *
 * This function returns the first zone at or below a given zone index that is
 * within the allowed nodemask. The zoneref returned is a cursor that can be
 * used to iterate the zonelist with next_zones_zonelist by advancing it by
 * one before calling.
 */
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
                                        enum zone_type highest_zoneidx,
                                        nodemask_t *nodes,
                                        struct zone **zone)
{
        struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs,
                                                        highest_zoneidx, nodes);
        *zone = zonelist_zone(z);
        return z;
}

zonelist에서 highest_zoneidx 이하의 zone이면서 nodes 비트맵에 설정된 노드들에서 가장 처음 발견된 zone을 반환한다.

 

아래 그림은 zonelist의 처음 부터 검색을 하여 ZONE_NORMAL 타입을 초과하지 않는 zone에 대해 적합한 zone을 알아온다.

first_zones_zonelist-1b

 

next_zones_zonelist()

mm/mmzone.c

/**
 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
 * @z - The cursor used as a starting point for the search
 * @highest_zoneidx - The zone index of the highest zone to return
 * @nodes - An optional nodemask to filter the zonelist with
 *
 * This function returns the next zone at or below a given zone index that is
 * within the allowed nodemask using a cursor as the starting point for the
 * search. The zoneref returned is a cursor that represents the current zone
 * being examined. It should be advanced by one before calling
 * next_zones_zonelist again.
 */
/* Returns the next zone at or below highest_zoneidx in a zonelist */
struct zoneref *next_zones_zonelist(struct zoneref *z,
                                        enum zone_type highest_zoneidx,
                                        nodemask_t *nodes)
{
        /*
         * Find the next suitable zone to use for the allocation.
         * Only filter based on nodemask if it's set
         */
        if (likely(nodes == NULL))
                while (zonelist_zone_idx(z) > highest_zoneidx)
                        z++;
        else
                while (zonelist_zone_idx(z) > highest_zoneidx ||
                                (z->zone && !zref_in_nodemask(z, nodes)))
                        z++;

        return z;
}

zonelists에서 처음 부터 검색하여 zone이 highest_zoneidx 이하의 zone인 경우 그  zone을 리턴하되 인수 nodes에 대해 아래와 같이 처리한다.

  • 인수 nodes가 지정된 경우 zonelists의 각 zone은 nodes에 포함된 zone으로 한정한다.
  • 인수 nodes가 지정되지 않은 경우 node에 대해 제한 없다.

 

  • if (likely(nodes == NULL))
    • 높은 확률로 nodes가 null인 경우
  • while (zonelist_zone_idx(z) > highest_zoneidx) z++;
    • zonelist에서 zone 타입이 highest_zoneidx를 초과하는 경우 다음 존을 계속 진행한다.
  • while (zonelist_zone_idx(z) > highest_zoneidx || (z->zone && !zref_in_nodemask(z, nodes))) z++;
    • zonelist에 zone 타입이 highest_zoneidx를 초과하거나 인수 nodes에 포함되어 있지 않은 경우 다음 존을 계속 진행한다.

아래 그림은 z 부터 검색을 하여 ZONE_NORMAL 타입을 초과하지 않는 zone에 대해 적합한 zone을 알아온다.

next_zones_zonelist-1b

 

zonelist_zone()

include/linux/mmzone.h

static inline struct zone *zonelist_zone(struct zoneref *zoneref)
{
        return zoneref->zone;
}

zonelist에서 zoneref의 zone 을 리턴한다.

 

zonelist_zone()

include/linux/mmzone.h

static inline int zonelist_zone_idx(struct zoneref *zoneref)
{                                       
        return zoneref->zone_idx;
}

zonelist에서 zoneref의 zone 타입을 리턴한다. (based 0)

 

zref_in_nodemask()

mm/mmzone.c

static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
{
#ifdef CONFIG_NUMA
        return node_isset(zonelist_node_idx(zref), *nodes);
#else
        return 1;
#endif /* CONFIG_NUMA */
}

nodes 노드 비트맵에 zref 의 노드가 포함 여부를 리턴한다. NUMA 시스템이 아닌 경우는 항상 1을 리턴한다.

 

ZONE과 관련한 GFP

gfp_zone()

include/linux/gfp.h

static inline enum zone_type gfp_zone(gfp_t flags)
{               
        enum zone_type z;
        int bit = (__force int) (flags & GFP_ZONEMASK);
                        
        z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
                                         ((1 << ZONES_SHIFT) - 1);
        VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
        return z;
}

ZONE 비트 정보가 포함된 flags를 사용하여 zone_type (based 0)을 리턴한다.

  •  int bit = (__force int) (flags & GFP_ZONEMASK);
    • ZONE에 대한 __GFP_**** 플래그를 추출한다.
  • z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & ((1 << ZONES_SHIFT) – 1);
    • zone type(based 0)을 알아와서 리턴한다.

 

 

/*
 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
 * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
 * and there are 16 of them to cover all possible combinations of
 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
 *
 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
 * But GFP_MOVABLE is not only a zone specifier but also an allocation
 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
 * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
 *
 *       bit       result
 *       =================
 *       0x0    => NORMAL
 *       0x1    => DMA or NORMAL
 *       0x2    => HIGHMEM or NORMAL
 *       0x3    => BAD (DMA+HIGHMEM)
 *       0x4    => DMA32 or DMA or NORMAL
 *       0x5    => BAD (DMA+DMA32)
 *       0x6    => BAD (HIGHMEM+DMA32)
 *       0x7    => BAD (HIGHMEM+DMA32+DMA)
 *       0x8    => NORMAL (MOVABLE+0)
 *       0x9    => DMA or NORMAL (MOVABLE+DMA)
 *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
 *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
 *       0xc    => DMA32 (MOVABLE+DMA32)
 *       0xd    => BAD (MOVABLE+DMA32+DMA)
 *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
 *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
 *
 * ZONES_SHIFT must be <= 2 on 32 bit platforms.
 */

#if 16 * ZONES_SHIFT > BITS_PER_LONG
#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif

#define GFP_ZONE_TABLE ( \
        (ZONE_NORMAL << 0 * ZONES_SHIFT)                                      \
        | (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT)                          \
        | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT)                  \
        | (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT)                      \
        | (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT)                       \
        | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT)       \
        | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT)   \
        | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT)   \
)

 

/*
 * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
 * entry starting with bit 0. Bit is set if the combination is not
 * allowed.
 */
#define GFP_ZONE_BAD ( \
        1 << (___GFP_DMA | ___GFP_HIGHMEM)                                    \
        | 1 << (___GFP_DMA | ___GFP_DMA32)                                    \
        | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)                                \
        | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)                   \
        | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)                 \
        | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)                   \
        | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)               \
        | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
)

 

ZONES_SHIFT

include/linux/page-flags-layout.h

/*
 * When a memory allocation must conform to specific limitations (such
 * as being suitable for DMA) the caller will pass in hints to the
 * allocator in the gfp_mask, in the zone modifier bits.  These bits
 * are used to select a priority ordered list of memory zones which
 * match the requested limits. See gfp_zone() in include/linux/gfp.h
 */
#if MAX_NR_ZONES < 2
#define ZONES_SHIFT 0
#elif MAX_NR_ZONES <= 2
#define ZONES_SHIFT 1
#elif MAX_NR_ZONES <= 4
#define ZONES_SHIFT 2
#else
#error ZONES_SHIFT -- too many zones configured adjust calculation
#endif

zone 을 표현하기 위해 좌측 쉬프트를 해야 하는 비트 수

  • zone이 1개인 경우 0 비트 (zone 비트가 필요 없음)
  • zone이 2개인 경우 1 비트
  • zone이 3~4개인 경우 2비트
  • zone이 5개 이상인 경우 에러

 

gfpflags_to_migratetype()

include/linux/gfp.h

/* Convert GFP flags to their corresponding migrate type */
static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
{               
        WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
        
        if (unlikely(page_group_by_mobility_disabled))
                return MIGRATE_UNMOVABLE;

        /* Group based on mobility */
        return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
                ((gfp_flags & __GFP_RECLAIMABLE) != 0);
}

gfp_flags에서 migrate type을 알아내어 반환한다.

 

참고

 

댓글 남기기