for_each_zone_zonelist()
include/linux/mmzone.h
/** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index * @zone - The current zone in the iterator * @z - The current pointer within zonelist->zones being iterated * @zlist - The zonelist being iterated * @highidx - The zone index of the highest zone to return * * This iterator iterates though all zones at or below a given zone index. */ #define for_each_zone_zonelist(zone, z, zlist, highidx) \ for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
zonelist에서 highest_zoneidx 이하의 zone 엔트리들에 대해 루프를 돌며 zone과 z(zoneref)를 반환한다.
for_each_zone_zonelist_nodemask()
include/linux/mmzone.h
/** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone - The current zone in the iterator * @z - The current pointer within zonelist->zones being iterated * @zlist - The zonelist being iterated * @highidx - The zone index of the highest zone to return * @nodemask - Nodemask allowed by the allocator * * This iterator iterates though all zones at or below a given zone index and * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) \
zonelist에서 highest_zoneidx 이하의 zone이면서 nodes 비트맵에 설정된 노드들인 zone 엔트리들에 대해 루프를 돌며 zone과 z(zoneref)를 반환한다.
/** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist * @zonelist - The zonelist to search for a suitable zone * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with * @zone - The first suitable zone found is returned via this parameter * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, nodemask_t *nodes, struct zone **zone) { struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes); *zone = zonelist_zone(z); return z; }
zonelist에서 highest_zoneidx 이하의 zone이면서 nodes 비트맵에 설정된 노드들에서 가장 처음 발견된 zone을 반환한다.
아래 그림은 zonelist의 처음 부터 검색을 하여 ZONE_NORMAL 타입을 초과하지 않는 zone에 대해 적합한 zone을 알아온다.
next_zones_zonelist()
mm/mmzone.c
/** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z - The cursor used as a starting point for the search * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the * search. The zoneref returned is a cursor that represents the current zone * being examined. It should be advanced by one before calling * next_zones_zonelist again. */ /* Returns the next zone at or below highest_zoneidx in a zonelist */ struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes) { /* * Find the next suitable zone to use for the allocation. * Only filter based on nodemask if it's set */ if (likely(nodes == NULL)) while (zonelist_zone_idx(z) > highest_zoneidx) z++; else while (zonelist_zone_idx(z) > highest_zoneidx || (z->zone && !zref_in_nodemask(z, nodes))) z++; return z; }
zonelists에서 처음 부터 검색하여 zone이 highest_zoneidx 이하의 zone인 경우 그 zone을 리턴하되 인수 nodes에 대해 아래와 같이 처리한다.
- 인수 nodes가 지정된 경우 zonelists의 각 zone은 nodes에 포함된 zone으로 한정한다.
- 인수 nodes가 지정되지 않은 경우 node에 대해 제한 없다.
- if (likely(nodes == NULL))
- 높은 확률로 nodes가 null인 경우
- while (zonelist_zone_idx(z) > highest_zoneidx) z++;
- zonelist에서 zone 타입이 highest_zoneidx를 초과하는 경우 다음 존을 계속 진행한다.
- while (zonelist_zone_idx(z) > highest_zoneidx || (z->zone && !zref_in_nodemask(z, nodes))) z++;
- zonelist에 zone 타입이 highest_zoneidx를 초과하거나 인수 nodes에 포함되어 있지 않은 경우 다음 존을 계속 진행한다.
아래 그림은 z 부터 검색을 하여 ZONE_NORMAL 타입을 초과하지 않는 zone에 대해 적합한 zone을 알아온다.
zonelist_zone()
include/linux/mmzone.h
static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; }
zonelist에서 zoneref의 zone 을 리턴한다.
zonelist_zone()
include/linux/mmzone.h
static inline int zonelist_zone_idx(struct zoneref *zoneref) { return zoneref->zone_idx; }
zonelist에서 zoneref의 zone 타입을 리턴한다. (based 0)
zref_in_nodemask()
mm/mmzone.c
static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) { #ifdef CONFIG_NUMA return node_isset(zonelist_node_idx(zref), *nodes); #else return 1; #endif /* CONFIG_NUMA */ }
nodes 노드 비트맵에 zref 의 노드가 포함 여부를 리턴한다. NUMA 시스템이 아닌 경우는 항상 1을 리턴한다.
ZONE과 관련한 GFP
gfp_zone()
include/linux/gfp.h
static inline enum zone_type gfp_zone(gfp_t flags) { enum zone_type z; int bit = (__force int) (flags & GFP_ZONEMASK); z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & ((1 << ZONES_SHIFT) - 1); VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1); return z; }
ZONE 비트 정보가 포함된 flags를 사용하여 zone_type (based 0)을 리턴한다.
- int bit = (__force int) (flags & GFP_ZONEMASK);
- ZONE에 대한 __GFP_**** 플래그를 추출한다.
- z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & ((1 << ZONES_SHIFT) – 1);
- zone type(based 0)을 알아와서 리턴한다.
/* * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long * and there are 16 of them to cover all possible combinations of * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. * * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. * But GFP_MOVABLE is not only a zone specifier but also an allocation * policy. Therefore __GFP_MOVABLE plus another zone selector is valid. * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1". * * bit result * ================= * 0x0 => NORMAL * 0x1 => DMA or NORMAL * 0x2 => HIGHMEM or NORMAL * 0x3 => BAD (DMA+HIGHMEM) * 0x4 => DMA32 or DMA or NORMAL * 0x5 => BAD (DMA+DMA32) * 0x6 => BAD (HIGHMEM+DMA32) * 0x7 => BAD (HIGHMEM+DMA32+DMA) * 0x8 => NORMAL (MOVABLE+0) * 0x9 => DMA or NORMAL (MOVABLE+DMA) * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) * 0xb => BAD (MOVABLE+HIGHMEM+DMA) * 0xc => DMA32 (MOVABLE+DMA32) * 0xd => BAD (MOVABLE+DMA32+DMA) * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) * * ZONES_SHIFT must be <= 2 on 32 bit platforms. */ #if 16 * ZONES_SHIFT > BITS_PER_LONG #error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer #endif #define GFP_ZONE_TABLE ( \ (ZONE_NORMAL << 0 * ZONES_SHIFT) \ | (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT) \ | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT) \ | (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT) \ | (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT) \ | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT) \ | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT) \ | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT) \ )
/* * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per * entry starting with bit 0. Bit is set if the combination is not * allowed. */ #define GFP_ZONE_BAD ( \ 1 << (___GFP_DMA | ___GFP_HIGHMEM) \ | 1 << (___GFP_DMA | ___GFP_DMA32) \ | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \ )
ZONES_SHIFT
include/linux/page-flags-layout.h
/* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the * allocator in the gfp_mask, in the zone modifier bits. These bits * are used to select a priority ordered list of memory zones which * match the requested limits. See gfp_zone() in include/linux/gfp.h */ #if MAX_NR_ZONES < 2 #define ZONES_SHIFT 0 #elif MAX_NR_ZONES <= 2 #define ZONES_SHIFT 1 #elif MAX_NR_ZONES <= 4 #define ZONES_SHIFT 2 #else #error ZONES_SHIFT -- too many zones configured adjust calculation #endif
zone 을 표현하기 위해 좌측 쉬프트를 해야 하는 비트 수
- zone이 1개인 경우 0 비트 (zone 비트가 필요 없음)
- zone이 2개인 경우 1 비트
- zone이 3~4개인 경우 2비트
- zone이 5개 이상인 경우 에러
gfpflags_to_migratetype()
include/linux/gfp.h
/* Convert GFP flags to their corresponding migrate type */ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) { WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; /* Group based on mobility */ return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | ((gfp_flags & __GFP_RECLAIMABLE) != 0); }
gfp_flags에서 migrate type을 알아내어 반환한다.
참고
- Bit Operations | 문c
- Bitmap Operations | 문c
- NODE 비트맵 (API) | 문c
- GFP 플래그 | 문c