では glibc-2.7 の posix_memalign の内部実装を見てみましょう。
__posix_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
5354 /* We need a wrapper function for one of the additions of POSIX. */
5355 int
5356 __posix_memalign (void **memptr, size_t alignment, size_t size)
5357 {
5358 void *mem;
5359
5360 /* Test whether the SIZE argument is valid. It must be a power of
5361 two multiple of sizeof (void *). */
5362 if (alignment % sizeof (void *) != 0
5363 || !powerof2 (alignment / sizeof (void *))
5364 || alignment == 0)
5365 return EINVAL;
5366
5367
5368 void *address = RETURN_ADDRESS (0);
5369 mem = _mid_memalign (alignment, size, address);
5370
5371 if (mem != NULL)
5372 {
5373 *memptr = mem;
5374 return 0;
5375 }
5376
5377 return ENOMEM;
5378 }
5379 weak_alias (__posix_memalign, posix_memalign)
__posix_memalign() がベースとなる関数ですが、この関数は _mid_memalign() 関数をコールしていますね。
5369 mem = _mid_memalign (alignment, size, address);
同様に alined_alloc() の実装も同様に _mid_memalign() 関数をコールします。
__libc_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
3252 void *
3253 __libc_memalign (size_t alignment, size_t bytes)
3254 {
3255 void *address = RETURN_ADDRESS (0);
3256 return _mid_memalign (alignment, bytes, address);
3257 }
//中略
3329 /* For ISO C11. */
3330 weak_alias (__libc_memalign, aligned_alloc)
3331 libc_hidden_def (__libc_memalign)
//中略
5580 strong_alias (__libc_memalign, __memalign)
5581 weak_alias (__libc_memalign, memalign)
では _mid_memalign() 関数の実装です。
_mid_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
3259 static void *
3260 _mid_memalign (size_t alignment, size_t bytes, void *address)
3261 {
3262 mstate ar_ptr;
3263 void *p;
3264
3265 void *(*hook) (size_t, size_t, const void *) =
3266 atomic_forced_read (__memalign_hook);
3267 if (__builtin_expect (hook != NULL, 0))
3268 return (*hook)(alignment, bytes, address);
3269
3270 /* If we need less alignment than we give anyway, just relay to malloc. */
3271 if (alignment <= MALLOC_ALIGNMENT)
3272 return __libc_malloc (bytes);
3273
3274 /* Otherwise, ensure that it is at least a minimum chunk size */
3275 if (alignment < MINSIZE)
3276 alignment = MINSIZE;
3277
3278 /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
3279 power of 2 and will cause overflow in the check below. */
3280 if (alignment > SIZE_MAX / 2 + 1)
3281 {
3282 __set_errno (EINVAL);
3283 return 0;
3284 }
3285
3286 /* Check for overflow. */
3287 if (bytes > SIZE_MAX - alignment - MINSIZE)
3288 {
3289 __set_errno (ENOMEM);
3290 return 0;
3291 }
3292
3293
3294 /* Make sure alignment is power of 2. */
3295 if (!powerof2 (alignment))
3296 {
3297 size_t a = MALLOC_ALIGNMENT * 2;
3298 while (a < alignment)
3299 a <<= 1;
3300 alignment = a;
3301 }
3302
3303 if (SINGLE_THREAD_P)
3304 {
3305 p = _int_memalign (&main_arena, alignment, bytes);
3306 assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
3307 &main_arena == arena_for_chunk (mem2chunk (p)));
3308
3309 return p;
3310 }
3311
3312 arena_get (ar_ptr, bytes + alignment + MINSIZE);
3313
3314 p = _int_memalign (ar_ptr, alignment, bytes);
3315 if (!p && ar_ptr != NULL)
3316 {
3317 LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
3318 ar_ptr = arena_get_retry (ar_ptr, bytes);
3319 p = _int_memalign (ar_ptr, alignment, bytes);
3320 }
3321
3322 if (ar_ptr != NULL)
3323 __libc_lock_unlock (ar_ptr->mutex);
3324
3325 assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
3326 ar_ptr == arena_for_chunk (mem2chunk (p)));
3327 return p;
3328 }
この中で注目したいのは MALLOC_ALIGNMENT 以下のアラインメントサイズであれば __libc_malloc() で割り当てを行うということです。
3270 /* If we need less alignment than we give anyway, just relay to malloc. */ 3271 if (alignment <= MALLOC_ALIGNMENT) 3272 return __libc_malloc (bytes);
この _mid_memalign() 関数は __libc_malloc() 関数をコールしてます。
さらに _mid_memalign() 関数は _int_memalign() 関数もコールしてます。
3312 arena_get (ar_ptr, bytes + alignment + MINSIZE);
3313
3314 p = _int_memalign (ar_ptr, alignment, bytes);
3315 if (!p && ar_ptr != NULL)
3316 {
3317 LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
3318 ar_ptr = arena_get_retry (ar_ptr, bytes);
3319 p = _int_memalign (ar_ptr, alignment, bytes);
3320 }リクエストされたアラインメントサイズが MALLOC_ALIGNMENT 以下の数値であれば __libc_malloc() 関数が呼び出され、そうでなければ代わりに _int_memalign() 関数がコールされるってな感じです。
てなことで __libc_malloc() と _int_memalign() 関数の中身もチェックしてみましょう。
__libc_malloc 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
3026 void *
3027 __libc_malloc (size_t bytes)
3028 {
3029 mstate ar_ptr;
3030 void *victim;
3031
3032 void *(*hook) (size_t, const void *)
3033 = atomic_forced_read (__malloc_hook);
3034 if (__builtin_expect (hook != NULL, 0))
3035 return (*hook)(bytes, RETURN_ADDRESS (0));
3036 #if USE_TCACHE
3037 /* int_free also calls request2size, be careful to not pad twice. */
3038 size_t tbytes;
3039 checked_request2size (bytes, tbytes);
3040 size_t tc_idx = csize2tidx (tbytes);
3041
3042 MAYBE_INIT_TCACHE ();
3043
3044 DIAG_PUSH_NEEDS_COMMENT;
3045 if (tc_idx < mp_.tcache_bins
3046 /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
3047 && tcache
3048 && tcache->entries[tc_idx] != NULL)
3049 {
3050 return tcache_get (tc_idx);
3051 }
3052 DIAG_POP_NEEDS_COMMENT;
3053 #endif
3054
3055 if (SINGLE_THREAD_P)
3056 {
3057 victim = _int_malloc (&main_arena, bytes);
3058 assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
3059 &main_arena == arena_for_chunk (mem2chunk (victim)));
3060 return victim;
3061 }
3062
3063 arena_get (ar_ptr, bytes);
3064
3065 victim = _int_malloc (ar_ptr, bytes);
3066 /* Retry with another arena only if we were able to find a usable arena
3067 before. */
3068 if (!victim && ar_ptr != NULL)
3069 {
3070 LIBC_PROBE (memory_malloc_retry, 1, bytes);
3071 ar_ptr = arena_get_retry (ar_ptr, bytes);
3072 victim = _int_malloc (ar_ptr, bytes);
3073 }
3074
3075 if (ar_ptr != NULL)
3076 __libc_lock_unlock (ar_ptr->mutex);
3077
3078 assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
3079 ar_ptr == arena_for_chunk (mem2chunk (victim)));
3080 return victim;
3081 }
__libc_malloc() 関数は _int_malloc() 関数をコールしてますね。
3063 arena_get (ar_ptr, bytes);
3064
3065 victim = _int_malloc (ar_ptr, bytes);
3066 /* Retry with another arena only if we were able to find a usable arena
3067 before. */
3068 if (!victim && ar_ptr != NULL)
3069 {
3070 LIBC_PROBE (memory_malloc_retry, 1, bytes);
3071 ar_ptr = arena_get_retry (ar_ptr, bytes);
3072 victim = _int_malloc (ar_ptr, bytes);
3073 }_int_malloc() 関数の宣言は以下のようになっています。
static void* _int_malloc(mstate, size_t);
この __libc_malloc() と _int_malloc() 関数は malloc の内部実装に該当します。
つまりリクエストされたアラインメントサイズが MALLOC_ALIGNMENT 以下の数値であればデフォルトのアラインメントが適用されます。
筆者の開発環境では 16 バイト境界がデフォルトになります。
そして _int_memalign() も同様に _int_malloc() 関数をコールします。
_int_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c).
4661 static void *
4662 _int_memalign (mstate av, size_t alignment, size_t bytes)
4663 {
4664 INTERNAL_SIZE_T nb; /* padded request size */
4665 char *m; /* memory returned by malloc call */
4666 mchunkptr p; /* corresponding chunk */
4667 char *brk; /* alignment point within p */
4668 mchunkptr newp; /* chunk to return */
4669 INTERNAL_SIZE_T newsize; /* its size */
4670 INTERNAL_SIZE_T leadsize; /* leading space before alignment point */
4671 mchunkptr remainder; /* spare room at end to split off */
4672 unsigned long remainder_size; /* its size */
4673 INTERNAL_SIZE_T size;
4674
4675
4676
4677 checked_request2size (bytes, nb);
4678
4679 /*
4680 Strategy: find a spot within that chunk that meets the alignment
4681 request, and then possibly free the leading and trailing space.
4682 */
4683
4684
4685 /* Check for overflow. */
4686 if (nb > SIZE_MAX - alignment - MINSIZE)
4687 {
4688 __set_errno (ENOMEM);
4689 return 0;
4690 }
4691
4692 /* Call malloc with worst case padding to hit alignment. */
4693
4694 m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
4695
4696 if (m == 0)
4697 return 0; /* propagate failure */
4698
4699 p = mem2chunk (m);
4700
4701 if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */
4702
4703 { /*
4704 Find an aligned spot inside chunk. Since we need to give back
4705 leading space in a chunk of at least MINSIZE, if the first
4706 calculation places us at a spot with less than MINSIZE leader,
4707 we can move to the next aligned spot -- we've allocated enough
4708 total room so that this is always possible.
4709 */
4710 brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
4711 - ((signed long) alignment));
4712 if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
4713 brk += alignment;
4714
4715 newp = (mchunkptr) brk;
4716 leadsize = brk - (char *) (p);
4717 newsize = chunksize (p) - leadsize;
4718
4719 /* For mmapped chunks, just adjust offset */
4720 if (chunk_is_mmapped (p))
4721 {
4722 set_prev_size (newp, prev_size (p) + leadsize);
4723 set_head (newp, newsize | IS_MMAPPED);
4724 return chunk2mem (newp);
4725 }
4726
4727 /* Otherwise, give back leader, use the rest */
4728 set_head (newp, newsize | PREV_INUSE |
4729 (av != &main_arena ? NON_MAIN_ARENA : 0));
4730 set_inuse_bit_at_offset (newp, newsize);
4731 set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
4732 _int_free (av, p, 1);
4733 p = newp;
4734
4735 assert (newsize >= nb &&
4736 (((unsigned long) (chunk2mem (p))) % alignment) == 0);
4737 }
4738
4739 /* Also give back spare room at the end */
4740 if (!chunk_is_mmapped (p))
4741 {
4742 size = chunksize (p);
4743 if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
4744 {
4745 remainder_size = size - nb;
4746 remainder = chunk_at_offset (p, nb);
4747 set_head (remainder, remainder_size | PREV_INUSE |
4748 (av != &main_arena ? NON_MAIN_ARENA : 0));
4749 set_head_size (p, nb);
4750 _int_free (av, remainder, 1);
4751 }
4752 }
4753
4754 check_inuse_chunk (av, p);
4755 return chunk2mem (p);
4756 }
この中で注目したいのはメモリーの割り当て時に、パディングされたバイトサイズとなる nb に対してアラインメントと MINSIZE が追加されることです。
4692 /* Call malloc with worst case padding to hit alignment. */ 4693 4694 m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
アラインメントは MINSIZE を加算することで可能になるのが分かると思います。
MINSIZE については以下のように計算されます。
https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c.
1190 /* The smallest possible chunk */ 1191 #define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize)) 1192 1193 /* The smallest size we can malloc is an aligned minimal chunk */ 1194 1195 #define MINSIZE \ 1196 (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
MIN_CHUNK_SIZE は struct malloc_chunk 構造体のメンバー fd_nextsize までのオフセットバイトを取得したものです。
参考までに以下が malloc_chunk の定義となります。
https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c.
1060 struct malloc_chunk {
1061
1062 INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */
1063 INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */
1064
1065 struct malloc_chunk* fd; /* double links -- used only if free. */
1066 struct malloc_chunk* bk;
1067
1068 /* Only used for large blocks: pointer to next larger size. */
1069 struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
1070 struct malloc_chunk* bk_nextsize;
1071 };
MINSIZE マクロは分かりにくいので、簡単なアラインメントをするコードを次の項目で考えて見ましょう。
Copyright 2018-2019, by Masaki Komatsu