You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
commit b1cb0982bdd6f57fed690f796659733350bb2cae
Author: Joonsoo Kim <[email protected]>
Date: Thu Oct 24 10:07:45 2013 +0900
slab: change the management method of free objects of the slab
简单地说就是,就是把“链表”改成了“栈”,page->active指示 栈顶
如下以表 2->3->0->1 的顺序释放对象时page->freelist[]数组的变化(以加粗方式突显每次 put 时变化的元素值)
/* * Get the memory for a slab management obj. * * For a slab cache when the slab descriptor is off-slab, the * slab descriptor can't come from the same cache which is being created, * Because if it is the case, that means we defer the creation of * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point. * And we eventually call down to __kmem_cache_create(), which * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one. * This is a "chicken-and-egg" problem. * * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches, * which are all initialized during kmem_cache_init(). */staticvoid*alloc_slabmgmt(structkmem_cache*cachep,
structpage*page, intcolour_off,
gfp_tlocal_flags, intnodeid)
{
void*freelist;
void*addr=page_address(page); /*根据 struct page 指针得到该结构所管理的虚拟地址*//*slab 的管理信息存储在 struct page里*/page->s_mem=addr+colour_off; /*该 slab 里的对象起始地址在着色区之后,由 s_mem 成员指出*/page->active=0;
if (OBJFREELIST_SLAB(cachep))
/*对于空闲数组在空闲对象里的类型,这里先将空闲数组指针设为空,由后面 cache_init_objs() 处理*/freelist=NULL;
elseif (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */freelist=kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
if (!freelist)
returnNULL;
} else {
/*对于空闲数组大于一个对象大小的类型,从分配的边界往回找到偏移空闲数组大小的地址,作为空闲数组指针*//* We will use last bytes at the slab for freelist */freelist=addr+ (PAGE_SIZE << cachep->gfporder) -cachep->freelist_size;
}
returnfreelist;
}
...
staticvoidcache_init_objs(structkmem_cache*cachep,
structpage*page)
{
inti;
void*objp;
boolshuffled;
cache_init_objs_debug(cachep, page);
/* Try to randomize the freelist if enabled */shuffled=shuffle_freelist(cachep, page);
if (!shuffled&&OBJFREELIST_SLAB(cachep)) {
/*index_to_obj(cachep, page, cachep->num - 1) 为 slab 最后一个对象。 obj_offset(cachep) 指向对象的起始地址,在 slab debug 开启时会跳过填充区和警戒区。 因此,对于空闲数组在空闲对象里的类型,空闲数组指针指向最后一个对象的起始地址*/page->freelist=index_to_obj(cachep, page, cachep->num-1) +obj_offset(cachep);
}
for (i=0; i<cachep->num; i++) {
objp=index_to_obj(cachep, page, i);
objp=kasan_init_slab_obj(cachep, objp);
/* constructor could break poison info */if (DEBUG==0&&cachep->ctor) {
kasan_unpoison_object_data(cachep, objp);
cachep->ctor(objp);
kasan_poison_object_data(cachep, objp);
}
if (!shuffled)
set_free_obj(page, i, i); /*初始化空闲数组*/
}
}
staticvoid*slab_get_obj(structkmem_cache*cachep, structpage*page)
{
void*objp;
objp=index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
/*这里只单纯地返回 object,调整链表的事留给 fixup_slab_list()*/returnobjp;
}
staticvoidslab_put_obj(structkmem_cache*cachep,
structpage*page, void*objp)
{
unsigned intobjnr=obj_to_index(cachep, page, objp);
#ifDEBUGunsigned inti;
/* Verify double free bug */for (i=page->active; i<cachep->num; i++) {
if (get_free_obj(page, i) ==objnr) {
pr_err("slab: double free detected in cache '%s', objp %px\n",
cachep->name, objp);
BUG();
}
}
#endifpage->active--;
if (!page->freelist)
/*只有 OBJFREELIST_SLAB 类型的 slab 才会有空的 freelist 域, 此条件下 objp 指向最后一个空闲对象,所以用来存放空闲数组*/page->freelist=objp+obj_offset(cachep);
set_free_obj(page, page->active, objnr);
}
/* * Map pages beginning at addr to the given cache and slab. This is required * for the slab allocator to be able to lookup the cache and slab of a * virtual address for kfree, ksize, and slab debugging. */staticvoidslab_map_pages(structkmem_cache*cache, structpage*page,
void*freelist)
{
page->slab_cache=cache;
page->freelist=freelist; /*对于 OBJFREELIST_SLAB 类型的 slab,freelist 这里还是空*/
}
/* * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */staticstructpage*cache_grow_begin(structkmem_cache*cachep,
gfp_tflags, intnodeid)
{
void*freelist;
size_toffset;
gfp_tlocal_flags;
intpage_node;
structkmem_cache_node*n;
structpage*page;
...
/* Get slab management. */freelist=alloc_slabmgmt(cachep, page, offset,
local_flags& ~GFP_CONSTRAINT_MASK, page_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1; /*管理数据在外部的 slab,freelist 是额外分配的,为空表示分配不成功*/slab_map_pages(cachep, page, freelist);
cache_init_objs(cachep, page);
...
returnpage;
opps1:
...
returnNULL;
}
...
/*slab_get_obj() 往往和 fixup_slab_list() 一起出现,该函数判断 slab 需不需要调整到 full 或者 partial 链表*/staticinlinevoidfixup_slab_list(structkmem_cache*cachep,
structkmem_cache_node*n, structpage*page,
void**list)
{
/* move slabp to correct slabp list: */list_del(&page->slab_list);
if (page->active==cachep->num) {
list_add(&page->slab_list, &n->slabs_full);
if (OBJFREELIST_SLAB(cachep)) {
#ifDEBUG
...
#endif/*OBJFREELIST_SLAB 类型的 slab 在满的时候会把 freelist 域置为空, 这就与 slab_put_obj() 那个判断对上了*/page->freelist=NULL;
}
} elselist_add(&page->slab_list, &n->slabs_partial);
}
三个 slab 链表和 Per-CPU 的 array_cache
mm/slab.h
/* * struct array_cache * * Purpose: * - LIFO ordering, to hand out cache-warm objects from _alloc * - reduce the number of linked list operations * - reduce spinlock operations * * The limit is stored in the per-cpu structure to reduce the data cache * footprint. * */structarray_cache {
unsigned intavail;
unsigned intlimit;
unsigned intbatchcount;
unsigned inttouched;
void*entry[]; /* * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. */
};
...
/* * The slab lists for all objects. */structkmem_cache_node {
spinlock_tlist_lock;
#ifdefCONFIG_SLABstructlist_headslabs_partial; /* partial list first, better asm code */structlist_headslabs_full;
structlist_headslabs_free;
unsigned longfree_objects;
unsigned intfree_limit;
unsigned intcolour_next; /* Per-node cache coloring */structarray_cache*shared; /* shared per node */structalien_cache**alien; /* on other nodes */unsigned longnext_reap; /* updated without locking */intfree_touched; /* updated without locking */#endif#ifdefCONFIG_SLUBunsigned longnr_partial;
structlist_headpartial;
#ifdefCONFIG_SLUB_DEBUGatomic_long_tnr_slabs;
atomic_long_ttotal_objects;
structlist_headfull;
#endif#endif
};
...*```
enumpageflags {
PG_locked, /* Page is locked. Don't touch. */PG_error,
PG_referenced,
PG_uptodate,
PG_dirty,
PG_lru,
PG_active,
PG_slab,
...
};
...
staticinlinestructpage*compound_head(structpage*page)
{
unsigned longhead=READ_ONCE(page->compound_head);
if (unlikely(head&1))
return (structpage*) (head-1);
returnpage;
}
static__always_inlineintPageTail(structpage*page)
{
returnREAD_ONCE(page->compound_head) &1;
}
...
/* * Page flags policies wrt compound pages * * PF_ANY: * the page flag is relevant for small, head and tail pages. * * PF_HEAD: * for compound page all operations related to the page flag applied to * head page. * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. * * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. */#definePF_ANY(page, enforce) page
#definePF_HEAD(page, enforce) compound_head(page)
#definePF_NO_TAIL(page, enforce) ({ \
VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
compound_head(page);})
#definePF_NO_COMPOUND(page, enforce) ({ \
VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \
page;})
/* * Macros to create function definitions for page flags */#defineTESTPAGEFLAG(uname, lname, policy) \
static __always_inline int Page##uname(struct page *page) \
{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
...
#define__SETPAGEFLAG(uname, lname, policy) \
static __always_inline void __SetPage##uname(struct page *page) \
{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
#define__CLEARPAGEFLAG(uname, lname, policy) \
static __always_inline void __ClearPage##uname(struct page *page) \
{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
...
#define__PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
__SETPAGEFLAG(uname, lname, policy) \
__CLEARPAGEFLAG(uname, lname, policy)
...
__PAGEFLAG(Slab, slab, PF_NO_TAIL)
宏展开后的函数定义
/*__ arch/x86/include/asm/bitops.h*//** * __set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * Unlike set_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */static__always_inlinevoid__set_bit(longnr, volatileunsigned long*addr)
{
asmvolatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
static__always_inlinevoid__SetPageSlab(structpage*page)
{ __set_bit(PG_slab, &({
do {
if (unlikely(1&&PageTail(page))) {
dump_page(page, "VM_BUG_ON_PAGE("__stringify(1&&PageTail(page))")");
BUG();
}
} while (0);
compound_head(page);})->flags); }
...___```
slab分配器的接口
kmem_cache_create()创建一个新的高速缓存。
SLAB_HWCACHE_LINE slab 层把一个 slab 内的所有对象和硬件 cache line 对齐。可以提高性能,但增加了内存开销,空间换时间。