では glibc-2.7 の posix_memalign の内部実装を見てみましょう。
__posix_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
5354 /* We need a wrapper function for one of the additions of POSIX. */ 5355 int 5356 __posix_memalign (void **memptr, size_t alignment, size_t size) 5357 { 5358 void *mem; 5359 5360 /* Test whether the SIZE argument is valid. It must be a power of 5361 two multiple of sizeof (void *). */ 5362 if (alignment % sizeof (void *) != 0 5363 || !powerof2 (alignment / sizeof (void *)) 5364 || alignment == 0) 5365 return EINVAL; 5366 5367 5368 void *address = RETURN_ADDRESS (0); 5369 mem = _mid_memalign (alignment, size, address); 5370 5371 if (mem != NULL) 5372 { 5373 *memptr = mem; 5374 return 0; 5375 } 5376 5377 return ENOMEM; 5378 } 5379 weak_alias (__posix_memalign, posix_memalign)
__posix_memalign() がベースとなる関数ですが、この関数は _mid_memalign() 関数をコールしていますね。
5369 mem = _mid_memalign (alignment, size, address);
同様に alined_alloc() の実装も同様に _mid_memalign() 関数をコールします。
__libc_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
3252 void * 3253 __libc_memalign (size_t alignment, size_t bytes) 3254 { 3255 void *address = RETURN_ADDRESS (0); 3256 return _mid_memalign (alignment, bytes, address); 3257 } //中略 3329 /* For ISO C11. */ 3330 weak_alias (__libc_memalign, aligned_alloc) 3331 libc_hidden_def (__libc_memalign) //中略 5580 strong_alias (__libc_memalign, __memalign) 5581 weak_alias (__libc_memalign, memalign)
では _mid_memalign() 関数の実装です。
_mid_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
3259 static void * 3260 _mid_memalign (size_t alignment, size_t bytes, void *address) 3261 { 3262 mstate ar_ptr; 3263 void *p; 3264 3265 void *(*hook) (size_t, size_t, const void *) = 3266 atomic_forced_read (__memalign_hook); 3267 if (__builtin_expect (hook != NULL, 0)) 3268 return (*hook)(alignment, bytes, address); 3269 3270 /* If we need less alignment than we give anyway, just relay to malloc. */ 3271 if (alignment <= MALLOC_ALIGNMENT) 3272 return __libc_malloc (bytes); 3273 3274 /* Otherwise, ensure that it is at least a minimum chunk size */ 3275 if (alignment < MINSIZE) 3276 alignment = MINSIZE; 3277 3278 /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a 3279 power of 2 and will cause overflow in the check below. */ 3280 if (alignment > SIZE_MAX / 2 + 1) 3281 { 3282 __set_errno (EINVAL); 3283 return 0; 3284 } 3285 3286 /* Check for overflow. */ 3287 if (bytes > SIZE_MAX - alignment - MINSIZE) 3288 { 3289 __set_errno (ENOMEM); 3290 return 0; 3291 } 3292 3293 3294 /* Make sure alignment is power of 2. */ 3295 if (!powerof2 (alignment)) 3296 { 3297 size_t a = MALLOC_ALIGNMENT * 2; 3298 while (a < alignment) 3299 a <<= 1; 3300 alignment = a; 3301 } 3302 3303 if (SINGLE_THREAD_P) 3304 { 3305 p = _int_memalign (&main_arena, alignment, bytes); 3306 assert (!p || chunk_is_mmapped (mem2chunk (p)) || 3307 &main_arena == arena_for_chunk (mem2chunk (p))); 3308 3309 return p; 3310 } 3311 3312 arena_get (ar_ptr, bytes + alignment + MINSIZE); 3313 3314 p = _int_memalign (ar_ptr, alignment, bytes); 3315 if (!p && ar_ptr != NULL) 3316 { 3317 LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment); 3318 ar_ptr = arena_get_retry (ar_ptr, bytes); 3319 p = _int_memalign (ar_ptr, alignment, bytes); 3320 } 3321 3322 if (ar_ptr != NULL) 3323 __libc_lock_unlock (ar_ptr->mutex); 3324 3325 assert (!p || chunk_is_mmapped (mem2chunk (p)) || 3326 ar_ptr == arena_for_chunk (mem2chunk (p))); 3327 return p; 3328 }
この中で注目したいのは MALLOC_ALIGNMENT 以下のアラインメントサイズであれば __libc_malloc() で割り当てを行うということです。
3270 /* If we need less alignment than we give anyway, just relay to malloc. */ 3271 if (alignment <= MALLOC_ALIGNMENT) 3272 return __libc_malloc (bytes);
この _mid_memalign() 関数は __libc_malloc() 関数をコールしてます。
さらに _mid_memalign() 関数は _int_memalign() 関数もコールしてます。
3312 arena_get (ar_ptr, bytes + alignment + MINSIZE); 3313 3314 p = _int_memalign (ar_ptr, alignment, bytes); 3315 if (!p && ar_ptr != NULL) 3316 { 3317 LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment); 3318 ar_ptr = arena_get_retry (ar_ptr, bytes); 3319 p = _int_memalign (ar_ptr, alignment, bytes); 3320 }
リクエストされたアラインメントサイズが MALLOC_ALIGNMENT 以下の数値であれば __libc_malloc() 関数が呼び出され、そうでなければ代わりに _int_memalign() 関数がコールされるってな感じです。
てなことで __libc_malloc() と _int_memalign() 関数の中身もチェックしてみましょう。
__libc_malloc 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
3026 void * 3027 __libc_malloc (size_t bytes) 3028 { 3029 mstate ar_ptr; 3030 void *victim; 3031 3032 void *(*hook) (size_t, const void *) 3033 = atomic_forced_read (__malloc_hook); 3034 if (__builtin_expect (hook != NULL, 0)) 3035 return (*hook)(bytes, RETURN_ADDRESS (0)); 3036 #if USE_TCACHE 3037 /* int_free also calls request2size, be careful to not pad twice. */ 3038 size_t tbytes; 3039 checked_request2size (bytes, tbytes); 3040 size_t tc_idx = csize2tidx (tbytes); 3041 3042 MAYBE_INIT_TCACHE (); 3043 3044 DIAG_PUSH_NEEDS_COMMENT; 3045 if (tc_idx < mp_.tcache_bins 3046 /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */ 3047 && tcache 3048 && tcache->entries[tc_idx] != NULL) 3049 { 3050 return tcache_get (tc_idx); 3051 } 3052 DIAG_POP_NEEDS_COMMENT; 3053 #endif 3054 3055 if (SINGLE_THREAD_P) 3056 { 3057 victim = _int_malloc (&main_arena, bytes); 3058 assert (!victim || chunk_is_mmapped (mem2chunk (victim)) || 3059 &main_arena == arena_for_chunk (mem2chunk (victim))); 3060 return victim; 3061 } 3062 3063 arena_get (ar_ptr, bytes); 3064 3065 victim = _int_malloc (ar_ptr, bytes); 3066 /* Retry with another arena only if we were able to find a usable arena 3067 before. */ 3068 if (!victim && ar_ptr != NULL) 3069 { 3070 LIBC_PROBE (memory_malloc_retry, 1, bytes); 3071 ar_ptr = arena_get_retry (ar_ptr, bytes); 3072 victim = _int_malloc (ar_ptr, bytes); 3073 } 3074 3075 if (ar_ptr != NULL) 3076 __libc_lock_unlock (ar_ptr->mutex); 3077 3078 assert (!victim || chunk_is_mmapped (mem2chunk (victim)) || 3079 ar_ptr == arena_for_chunk (mem2chunk (victim))); 3080 return victim; 3081 }
__libc_malloc() 関数は _int_malloc() 関数をコールしてますね。
3063 arena_get (ar_ptr, bytes); 3064 3065 victim = _int_malloc (ar_ptr, bytes); 3066 /* Retry with another arena only if we were able to find a usable arena 3067 before. */ 3068 if (!victim && ar_ptr != NULL) 3069 { 3070 LIBC_PROBE (memory_malloc_retry, 1, bytes); 3071 ar_ptr = arena_get_retry (ar_ptr, bytes); 3072 victim = _int_malloc (ar_ptr, bytes); 3073 }
_int_malloc() 関数の宣言は以下のようになっています。
static void* _int_malloc(mstate, size_t);
この __libc_malloc() と _int_malloc() 関数は malloc の内部実装に該当します。
つまりリクエストされたアラインメントサイズが MALLOC_ALIGNMENT 以下の数値であればデフォルトのアラインメントが適用されます。
筆者の開発環境では 16 バイト境界がデフォルトになります。
そして _int_memalign() も同様に _int_malloc() 関数をコールします。
_int_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
4661 static void * 4662 _int_memalign (mstate av, size_t alignment, size_t bytes) 4663 { 4664 INTERNAL_SIZE_T nb; /* padded request size */ 4665 char *m; /* memory returned by malloc call */ 4666 mchunkptr p; /* corresponding chunk */ 4667 char *brk; /* alignment point within p */ 4668 mchunkptr newp; /* chunk to return */ 4669 INTERNAL_SIZE_T newsize; /* its size */ 4670 INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ 4671 mchunkptr remainder; /* spare room at end to split off */ 4672 unsigned long remainder_size; /* its size */ 4673 INTERNAL_SIZE_T size; 4674 4675 4676 4677 checked_request2size (bytes, nb); 4678 4679 /* 4680 Strategy: find a spot within that chunk that meets the alignment 4681 request, and then possibly free the leading and trailing space. 4682 */ 4683 4684 4685 /* Check for overflow. */ 4686 if (nb > SIZE_MAX - alignment - MINSIZE) 4687 { 4688 __set_errno (ENOMEM); 4689 return 0; 4690 } 4691 4692 /* Call malloc with worst case padding to hit alignment. */ 4693 4694 m = (char *) (_int_malloc (av, nb + alignment + MINSIZE)); 4695 4696 if (m == 0) 4697 return 0; /* propagate failure */ 4698 4699 p = mem2chunk (m); 4700 4701 if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */ 4702 4703 { /* 4704 Find an aligned spot inside chunk. Since we need to give back 4705 leading space in a chunk of at least MINSIZE, if the first 4706 calculation places us at a spot with less than MINSIZE leader, 4707 we can move to the next aligned spot -- we've allocated enough 4708 total room so that this is always possible. 4709 */ 4710 brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) & 4711 - ((signed long) alignment)); 4712 if ((unsigned long) (brk - (char *) (p)) < MINSIZE) 4713 brk += alignment; 4714 4715 newp = (mchunkptr) brk; 4716 leadsize = brk - (char *) (p); 4717 newsize = chunksize (p) - leadsize; 4718 4719 /* For mmapped chunks, just adjust offset */ 4720 if (chunk_is_mmapped (p)) 4721 { 4722 set_prev_size (newp, prev_size (p) + leadsize); 4723 set_head (newp, newsize | IS_MMAPPED); 4724 return chunk2mem (newp); 4725 } 4726 4727 /* Otherwise, give back leader, use the rest */ 4728 set_head (newp, newsize | PREV_INUSE | 4729 (av != &main_arena ? NON_MAIN_ARENA : 0)); 4730 set_inuse_bit_at_offset (newp, newsize); 4731 set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); 4732 _int_free (av, p, 1); 4733 p = newp; 4734 4735 assert (newsize >= nb && 4736 (((unsigned long) (chunk2mem (p))) % alignment) == 0); 4737 } 4738 4739 /* Also give back spare room at the end */ 4740 if (!chunk_is_mmapped (p)) 4741 { 4742 size = chunksize (p); 4743 if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE)) 4744 { 4745 remainder_size = size - nb; 4746 remainder = chunk_at_offset (p, nb); 4747 set_head (remainder, remainder_size | PREV_INUSE | 4748 (av != &main_arena ? NON_MAIN_ARENA : 0)); 4749 set_head_size (p, nb); 4750 _int_free (av, remainder, 1); 4751 } 4752 } 4753 4754 check_inuse_chunk (av, p); 4755 return chunk2mem (p); 4756 }
この中で注目したいのはメモリーの割り当て時に、パディングされたバイトサイズとなる nb に対してアラインメントと MINSIZE が追加されることです。
4692 /* Call malloc with worst case padding to hit alignment. */ 4693 4694 m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
アラインメントは MINSIZE を加算することで可能になるのが分かると思います。
MINSIZE については以下のように計算されます。
https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c.
1190 /* The smallest possible chunk */ 1191 #define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize)) 1192 1193 /* The smallest size we can malloc is an aligned minimal chunk */ 1194 1195 #define MINSIZE \ 1196 (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
MIN_CHUNK_SIZE は struct malloc_chunk 構造体のメンバー fd_nextsize までのオフセットバイトを取得したものです。
参考までに以下が malloc_chunk の定義となります。
https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c.
1060 struct malloc_chunk { 1061 1062 INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */ 1063 INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */ 1064 1065 struct malloc_chunk* fd; /* double links -- used only if free. */ 1066 struct malloc_chunk* bk; 1067 1068 /* Only used for large blocks: pointer to next larger size. */ 1069 struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */ 1070 struct malloc_chunk* bk_nextsize; 1071 };
MINSIZE マクロは分かりにくいので、簡単なアラインメントをするコードを次の項目で考えて見ましょう。
Copyright 2018-2019, by Masaki Komatsu