54.1. glibc-2.7 の内部実装

 では glibc-2.7 の posix_memalign の内部実装を見てみましょう。

__posix_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c). 

5354 /* We need a wrapper function for one of the additions of POSIX.  */
5355 int
5356 __posix_memalign (void **memptr, size_t alignment, size_t size)
5357 {
5358   void *mem;
5359
5360   /* Test whether the SIZE argument is valid.  It must be a power of
5361      two multiple of sizeof (void *).  */
5362   if (alignment % sizeof (void *) != 0
5363       || !powerof2 (alignment / sizeof (void *))
5364       || alignment == 0)
5365     return EINVAL;
5366
5367
5368   void *address = RETURN_ADDRESS (0);
5369   mem = _mid_memalign (alignment, size, address);
5370
5371   if (mem != NULL)
5372     {
5373       *memptr = mem;
5374       return 0;
5375     }
5376
5377   return ENOMEM;
5378 }
5379 weak_alias (__posix_memalign, posix_memalign)

 __posix_memalign() がベースとなる関数ですが、この関数は _mid_memalign() 関数をコールしていますね。

5369   mem = _mid_memalign (alignment, size, address);

 同様に alined_alloc() の実装も同様に _mid_memalign() 関数をコールします。

__libc_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c). 

3252 void *
3253 __libc_memalign (size_t alignment, size_t bytes)
3254 {
3255   void *address = RETURN_ADDRESS (0);
3256   return _mid_memalign (alignment, bytes, address);
3257 }
//中略
3329 /* For ISO C11.  */
3330 weak_alias (__libc_memalign, aligned_alloc)
3331 libc_hidden_def (__libc_memalign)
//中略
5580 strong_alias (__libc_memalign, __memalign)
5581 weak_alias (__libc_memalign, memalign)

 では _mid_memalign() 関数の実装です。

_mid_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c). 

3259 static void *
3260 _mid_memalign (size_t alignment, size_t bytes, void *address)
3261 {
3262   mstate ar_ptr;
3263   void *p;
3264
3265   void *(*hook) (size_t, size_t, const void *) =
3266     atomic_forced_read (__memalign_hook);
3267   if (__builtin_expect (hook != NULL, 0))
3268     return (*hook)(alignment, bytes, address);
3269
3270   /* If we need less alignment than we give anyway, just relay to malloc.  */
3271   if (alignment <= MALLOC_ALIGNMENT)
3272     return __libc_malloc (bytes);
3273
3274   /* Otherwise, ensure that it is at least a minimum chunk size */
3275   if (alignment < MINSIZE)
3276     alignment = MINSIZE;
3277
3278   /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
3279      power of 2 and will cause overflow in the check below.  */
3280   if (alignment > SIZE_MAX / 2 + 1)
3281     {
3282       __set_errno (EINVAL);
3283       return 0;
3284     }
3285
3286   /* Check for overflow.  */
3287   if (bytes > SIZE_MAX - alignment - MINSIZE)
3288     {
3289       __set_errno (ENOMEM);
3290       return 0;
3291     }
3292
3293
3294   /* Make sure alignment is power of 2.  */
3295   if (!powerof2 (alignment))
3296     {
3297       size_t a = MALLOC_ALIGNMENT * 2;
3298       while (a < alignment)
3299         a <<= 1;
3300       alignment = a;
3301     }
3302
3303   if (SINGLE_THREAD_P)
3304     {
3305       p = _int_memalign (&main_arena, alignment, bytes);
3306       assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
3307         &main_arena == arena_for_chunk (mem2chunk (p)));
3308
3309       return p;
3310     }
3311
3312   arena_get (ar_ptr, bytes + alignment + MINSIZE);
3313
3314   p = _int_memalign (ar_ptr, alignment, bytes);
3315   if (!p && ar_ptr != NULL)
3316     {
3317       LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
3318       ar_ptr = arena_get_retry (ar_ptr, bytes);
3319       p = _int_memalign (ar_ptr, alignment, bytes);
3320     }
3321
3322   if (ar_ptr != NULL)
3323     __libc_lock_unlock (ar_ptr->mutex);
3324
3325   assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
3326           ar_ptr == arena_for_chunk (mem2chunk (p)));
3327   return p;
3328 }

 この中で注目したいのは MALLOC_ALIGNMENT 以下のアラインメントサイズであれば __libc_malloc() で割り当てを行うということです。

3270   /* If we need less alignment than we give anyway, just relay to malloc.  */
3271   if (alignment <= MALLOC_ALIGNMENT)
3272     return __libc_malloc (bytes);

 この _mid_memalign() 関数は __libc_malloc() 関数をコールしてます。

 さらに _mid_memalign() 関数は _int_memalign() 関数もコールしてます。

3312   arena_get (ar_ptr, bytes + alignment + MINSIZE);
3313
3314   p = _int_memalign (ar_ptr, alignment, bytes);
3315   if (!p && ar_ptr != NULL)
3316     {
3317       LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
3318       ar_ptr = arena_get_retry (ar_ptr, bytes);
3319       p = _int_memalign (ar_ptr, alignment, bytes);
3320     }

 リクエストされたアラインメントサイズが MALLOC_ALIGNMENT 以下の数値であれば __libc_malloc() 関数が呼び出され、そうでなければ代わりに _int_memalign() 関数がコールされるってな感じです。

 てなことで __libc_malloc() と _int_memalign() 関数の中身もチェックしてみましょう。

__libc_malloc 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c). 

3026 void *
3027 __libc_malloc (size_t bytes)
3028 {
3029   mstate ar_ptr;
3030   void *victim;
3031
3032   void *(*hook) (size_t, const void *)
3033     = atomic_forced_read (__malloc_hook);
3034   if (__builtin_expect (hook != NULL, 0))
3035     return (*hook)(bytes, RETURN_ADDRESS (0));
3036 #if USE_TCACHE
3037   /* int_free also calls request2size, be careful to not pad twice.  */
3038   size_t tbytes;
3039   checked_request2size (bytes, tbytes);
3040   size_t tc_idx = csize2tidx (tbytes);
3041
3042   MAYBE_INIT_TCACHE ();
3043
3044   DIAG_PUSH_NEEDS_COMMENT;
3045   if (tc_idx < mp_.tcache_bins
3046       /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
3047       && tcache
3048       && tcache->entries[tc_idx] != NULL)
3049     {
3050       return tcache_get (tc_idx);
3051     }
3052   DIAG_POP_NEEDS_COMMENT;
3053 #endif
3054
3055   if (SINGLE_THREAD_P)
3056     {
3057       victim = _int_malloc (&main_arena, bytes);
3058       assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
3059         &main_arena == arena_for_chunk (mem2chunk (victim)));
3060       return victim;
3061     }
3062
3063   arena_get (ar_ptr, bytes);
3064
3065   victim = _int_malloc (ar_ptr, bytes);
3066   /* Retry with another arena only if we were able to find a usable arena
3067      before.  */
3068   if (!victim && ar_ptr != NULL)
3069     {
3070       LIBC_PROBE (memory_malloc_retry, 1, bytes);
3071       ar_ptr = arena_get_retry (ar_ptr, bytes);
3072       victim = _int_malloc (ar_ptr, bytes);
3073     }
3074
3075   if (ar_ptr != NULL)
3076     __libc_lock_unlock (ar_ptr->mutex);
3077
3078   assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
3079           ar_ptr == arena_for_chunk (mem2chunk (victim)));
3080   return victim;
3081 }

 __libc_malloc() 関数は _int_malloc() 関数をコールしてますね。

3063   arena_get (ar_ptr, bytes);
3064
3065   victim = _int_malloc (ar_ptr, bytes);
3066   /* Retry with another arena only if we were able to find a usable arena
3067      before.  */
3068   if (!victim && ar_ptr != NULL)
3069     {
3070       LIBC_PROBE (memory_malloc_retry, 1, bytes);
3071       ar_ptr = arena_get_retry (ar_ptr, bytes);
3072       victim = _int_malloc (ar_ptr, bytes);
3073     }

 _int_malloc() 関数の宣言は以下のようになっています。

static void*  _int_malloc(mstate, size_t);

 この __libc_malloc() と _int_malloc() 関数は malloc の内部実装に該当します。

 つまりリクエストされたアラインメントサイズが MALLOC_ALIGNMENT 以下の数値であればデフォルトのアラインメントが適用されます。

 筆者の開発環境では 16 バイト境界がデフォルトになります。

 そして _int_memalign() も同様に _int_malloc() 関数をコールします。

_int_memalign 関数(https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c). 

4661 static void *
4662 _int_memalign (mstate av, size_t alignment, size_t bytes)
4663 {
4664   INTERNAL_SIZE_T nb;             /* padded  request size */
4665   char *m;                        /* memory returned by malloc call */
4666   mchunkptr p;                    /* corresponding chunk */
4667   char *brk;                      /* alignment point within p */
4668   mchunkptr newp;                 /* chunk to return */
4669   INTERNAL_SIZE_T newsize;        /* its size */
4670   INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
4671   mchunkptr remainder;            /* spare room at end to split off */
4672   unsigned long remainder_size;   /* its size */
4673   INTERNAL_SIZE_T size;
4674
4675
4676
4677   checked_request2size (bytes, nb);
4678
4679   /*
4680      Strategy: find a spot within that chunk that meets the alignment
4681      request, and then possibly free the leading and trailing space.
4682    */
4683
4684
4685   /* Check for overflow.  */
4686   if (nb > SIZE_MAX - alignment - MINSIZE)
4687     {
4688       __set_errno (ENOMEM);
4689       return 0;
4690     }
4691
4692   /* Call malloc with worst case padding to hit alignment. */
4693
4694   m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
4695
4696   if (m == 0)
4697     return 0;           /* propagate failure */
4698
4699   p = mem2chunk (m);
4700
4701   if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
4702
4703     { /*
4704                 Find an aligned spot inside chunk.  Since we need to give back
4705                 leading space in a chunk of at least MINSIZE, if the first
4706                 calculation places us at a spot with less than MINSIZE leader,
4707                 we can move to the next aligned spot -- we've allocated enough
4708                 total room so that this is always possible.
4709                  */
4710       brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
4711                                 - ((signed long) alignment));
4712       if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
4713         brk += alignment;
4714
4715       newp = (mchunkptr) brk;
4716       leadsize = brk - (char *) (p);
4717       newsize = chunksize (p) - leadsize;
4718
4719       /* For mmapped chunks, just adjust offset */
4720       if (chunk_is_mmapped (p))
4721         {
4722           set_prev_size (newp, prev_size (p) + leadsize);
4723           set_head (newp, newsize | IS_MMAPPED);
4724           return chunk2mem (newp);
4725         }
4726
4727       /* Otherwise, give back leader, use the rest */
4728       set_head (newp, newsize | PREV_INUSE |
4729                 (av != &main_arena ? NON_MAIN_ARENA : 0));
4730       set_inuse_bit_at_offset (newp, newsize);
4731       set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
4732       _int_free (av, p, 1);
4733       p = newp;
4734
4735       assert (newsize >= nb &&
4736               (((unsigned long) (chunk2mem (p))) % alignment) == 0);
4737     }
4738
4739   /* Also give back spare room at the end */
4740   if (!chunk_is_mmapped (p))
4741     {
4742       size = chunksize (p);
4743       if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
4744         {
4745           remainder_size = size - nb;
4746           remainder = chunk_at_offset (p, nb);
4747           set_head (remainder, remainder_size | PREV_INUSE |
4748                     (av != &main_arena ? NON_MAIN_ARENA : 0));
4749           set_head_size (p, nb);
4750           _int_free (av, remainder, 1);
4751         }
4752     }
4753
4754   check_inuse_chunk (av, p);
4755   return chunk2mem (p);
4756 }

 この中で注目したいのはメモリーの割り当て時に、パディングされたバイトサイズとなる nb に対してアラインメントと MINSIZE が追加されることです。

4692   /* Call malloc with worst case padding to hit alignment. */
4693
4694   m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));

 アラインメントは MINSIZE を加算することで可能になるのが分かると思います。

 MINSIZE については以下のように計算されます。

https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c

1190 /* The smallest possible chunk */
1191 #define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
1192
1193 /* The smallest size we can malloc is an aligned minimal chunk */
1194
1195 #define MINSIZE  \
1196   (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))

 MIN_CHUNK_SIZE は struct malloc_chunk 構造体のメンバー fd_nextsize までのオフセットバイトを取得したものです。

 参考までに以下が malloc_chunk の定義となります。

https://github.com/MacKomatsu/glibc/blob/release/2.27/master/malloc/malloc.c

1060 struct malloc_chunk {
1061
1062   INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
1063   INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
1064
1065   struct malloc_chunk* fd;         /* double links -- used only if free. */
1066   struct malloc_chunk* bk;
1067
1068   /* Only used for large blocks: pointer to next larger size.  */
1069   struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
1070   struct malloc_chunk* bk_nextsize;
1071 };

 MINSIZE マクロは分かりにくいので、簡単なアラインメントをするコードを次の項目で考えて見ましょう。

Copyright 2018-2019, by Masaki Komatsu