| /* ---------------------------------------------------------------------------- |
| Copyright (c) 2020, Microsoft Research, Daan Leijen |
| This is free software; you can redistribute it and/or modify it under the |
| terms of the MIT license. A copy of the license can be found in the file |
| "LICENSE" at the root of this distribution. |
| -----------------------------------------------------------------------------*/ |
| |
| /* ---------------------------------------------------------------------------- |
| Implements a cache of segments to avoid expensive OS calls and to reuse |
| the commit_mask to optimize the commit/decommit calls. |
| The full memory map of all segments is also implemented here. |
| -----------------------------------------------------------------------------*/ |
| #include "mimalloc.h" |
| #include "mimalloc/internal.h" |
| #include "mimalloc/atomic.h" |
| |
| #include "./bitmap.h" // atomic bitmap |
| |
| //#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache |
| |
| #define MI_CACHE_FIELDS (16) |
| #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit |
| |
| #define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) |
| #define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes |
| |
| typedef struct mi_cache_slot_s { |
| void* p; |
| size_t memid; |
| bool is_pinned; |
| mi_commit_mask_t commit_mask; |
| mi_commit_mask_t decommit_mask; |
| _Atomic(mi_msecs_t) expire; |
| } mi_cache_slot_t; |
| |
| static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 |
| |
| static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! |
| static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; |
| static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free |
| |
| static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { |
| mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); |
| mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; |
| return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); |
| } |
| |
| mi_decl_noinline static void* mi_segment_cache_pop_ex( |
| bool all_suitable, |
| size_t size, mi_commit_mask_t* commit_mask, |
| mi_commit_mask_t* decommit_mask, bool large_allowed, |
| bool* large, bool* is_pinned, bool* is_zero, |
| mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) |
| { |
| #ifdef MI_CACHE_DISABLE |
| return NULL; |
| #else |
| |
| // only segment blocks |
| if (size != MI_SEGMENT_SIZE) return NULL; |
| |
| // numa node determines start field |
| const int numa_node = _mi_os_numa_node(tld); |
| size_t start_field = 0; |
| if (numa_node > 0) { |
| start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; |
| if (start_field >= MI_CACHE_FIELDS) start_field = 0; |
| } |
| |
| // find an available slot and make it unavailable |
| mi_bitmap_index_t bitidx = 0; |
| bool claimed = false; |
| mi_arena_id_t req_arena_id = _req_arena_id; |
| mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? |
| |
| if (large_allowed) { // large allowed? |
| claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); |
| if (claimed) *large = true; |
| } |
| if (!claimed) { |
| claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); |
| if (claimed) *large = false; |
| } |
| |
| if (!claimed) return NULL; |
| |
| // no longer available but still in-use |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); |
| |
| // found a slot |
| mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; |
| void* p = slot->p; |
| *memid = slot->memid; |
| *is_pinned = slot->is_pinned; |
| *is_zero = false; |
| *commit_mask = slot->commit_mask; |
| *decommit_mask = slot->decommit_mask; |
| slot->p = NULL; |
| mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); |
| |
| // mark the slot as free again |
| _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); |
| return p; |
| #endif |
| } |
| |
| |
| mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) |
| { |
| return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); |
| } |
| |
| static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) |
| { |
| if (mi_commit_mask_is_empty(cmask)) { |
| // nothing |
| } |
| else if (mi_commit_mask_is_full(cmask)) { |
| // decommit the whole in one call |
| _mi_os_decommit(p, total, stats); |
| } |
| else { |
| // decommit parts |
| mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); |
| size_t part = total/MI_COMMIT_MASK_BITS; |
| size_t idx; |
| size_t count; |
| mi_commit_mask_foreach(cmask, idx, count) { |
| void* start = (uint8_t*)p + (idx*part); |
| size_t size = count*part; |
| _mi_os_decommit(start, size, stats); |
| } |
| mi_commit_mask_foreach_end() |
| } |
| mi_commit_mask_create_empty(cmask); |
| } |
| |
| #define MI_MAX_PURGE_PER_PUSH (4) |
| |
| static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) |
| { |
| MI_UNUSED(tld); |
| if (!mi_option_is_enabled(mi_option_allow_decommit)) return; |
| mi_msecs_t now = _mi_clock_now(); |
| size_t purged = 0; |
| const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); |
| size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); |
| for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots |
| if (idx >= MI_CACHE_MAX) idx = 0; // wrap |
| mi_cache_slot_t* slot = &cache[idx]; |
| mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); |
| if (expire != 0 && (force || now >= expire)) { // racy read |
| // seems expired, first claim it from available |
| purged++; |
| mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); |
| if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways |
| // it was available, we claimed it (and made it unavailable) |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); |
| // we can now access it safely |
| expire = mi_atomic_loadi64_acquire(&slot->expire); |
| if (expire != 0 && (force || now >= expire)) { // safe read |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); |
| // still expired, decommit it |
| mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); |
| mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); |
| _mi_abandoned_await_readers(); // wait until safe to decommit |
| // decommit committed parts |
| // TODO: instead of decommit, we could also free to the OS? |
| mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); |
| mi_commit_mask_create_empty(&slot->decommit_mask); |
| } |
| _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop |
| } |
| if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push |
| } |
| } |
| } |
| |
| void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { |
| if (force) { |
| // called on `mi_collect(true)` but not on thread termination |
| _mi_segment_cache_free_all(tld); |
| } |
| else { |
| mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); |
| } |
| } |
| |
| void _mi_segment_cache_free_all(mi_os_tld_t* tld) { |
| mi_commit_mask_t commit_mask; |
| mi_commit_mask_t decommit_mask; |
| bool is_pinned; |
| bool is_zero; |
| bool is_large; |
| size_t memid; |
| const size_t size = MI_SEGMENT_SIZE; |
| void* p; |
| do { |
| // keep popping and freeing the memory |
| p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, |
| true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); |
| if (p != NULL) { |
| size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); |
| if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } |
| _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); |
| } |
| } while (p != NULL); |
| } |
| |
| mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) |
| { |
| #ifdef MI_CACHE_DISABLE |
| return false; |
| #else |
| |
| // purge expired entries |
| mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); |
| |
| // only cache normal segment blocks |
| if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; |
| |
| // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) |
| // This is a common case with reserved huge OS pages. |
| // |
| // (note: we could also allow segments that are already fully decommitted but that never happens |
| // as the first slice is always committed (for the segment metadata)) |
| if (!_mi_arena_is_os_allocated(memid) && is_pinned) return false; |
| |
| // numa node determines start field |
| int numa_node = _mi_os_numa_node(NULL); |
| size_t start_field = 0; |
| if (numa_node > 0) { |
| start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; |
| if (start_field >= MI_CACHE_FIELDS) start_field = 0; |
| } |
| |
| // find an available slot |
| mi_bitmap_index_t bitidx; |
| bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); |
| if (!claimed) return false; |
| |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); |
| mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); |
| #if MI_DEBUG>1 |
| if (is_pinned || is_large) { |
| mi_assert_internal(mi_commit_mask_is_full(commit_mask)); |
| } |
| #endif |
| |
| // set the slot |
| mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; |
| slot->p = start; |
| slot->memid = memid; |
| slot->is_pinned = is_pinned; |
| mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); |
| slot->commit_mask = *commit_mask; |
| slot->decommit_mask = *decommit_mask; |
| if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { |
| long delay = mi_option_get(mi_option_segment_decommit_delay); |
| if (delay == 0) { |
| _mi_abandoned_await_readers(); // wait until safe to decommit |
| mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); |
| mi_commit_mask_create_empty(&slot->decommit_mask); |
| } |
| else { |
| mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); |
| } |
| } |
| |
| // make it available |
| _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); |
| return true; |
| #endif |
| } |
| |
| |
| /* ----------------------------------------------------------- |
| The following functions are to reliably find the segment or |
| block that encompasses any pointer p (or NULL if it is not |
| in any of our segments). |
| We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) |
| set to 1 if it contains the segment meta data. |
| ----------------------------------------------------------- */ |
| |
| |
| #if (MI_INTPTR_SIZE==8) |
| #define MI_MAX_ADDRESS ((size_t)40 << 40) // 20TB |
| #else |
| #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb |
| #endif |
| |
| #define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) |
| #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) |
| #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) |
| |
| static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments |
| |
| static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { |
| mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? |
| if ((uintptr_t)segment >= MI_MAX_ADDRESS) { |
| *bitidx = 0; |
| return MI_SEGMENT_MAP_WSIZE; |
| } |
| else { |
| const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; |
| *bitidx = segindex % MI_INTPTR_BITS; |
| const size_t mapindex = segindex / MI_INTPTR_BITS; |
| mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); |
| return mapindex; |
| } |
| } |
| |
| void _mi_segment_map_allocated_at(const mi_segment_t* segment) { |
| size_t bitidx; |
| size_t index = mi_segment_map_index_of(segment, &bitidx); |
| mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); |
| if (index==MI_SEGMENT_MAP_WSIZE) return; |
| uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); |
| uintptr_t newmask; |
| do { |
| newmask = (mask | ((uintptr_t)1 << bitidx)); |
| } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); |
| } |
| |
| void _mi_segment_map_freed_at(const mi_segment_t* segment) { |
| size_t bitidx; |
| size_t index = mi_segment_map_index_of(segment, &bitidx); |
| mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); |
| if (index == MI_SEGMENT_MAP_WSIZE) return; |
| uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); |
| uintptr_t newmask; |
| do { |
| newmask = (mask & ~((uintptr_t)1 << bitidx)); |
| } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); |
| } |
| |
| // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. |
| static mi_segment_t* _mi_segment_of(const void* p) { |
| if (p == NULL) return NULL; |
| mi_segment_t* segment = _mi_ptr_segment(p); |
| mi_assert_internal(segment != NULL); |
| size_t bitidx; |
| size_t index = mi_segment_map_index_of(segment, &bitidx); |
| // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge |
| const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); |
| if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { |
| return segment; // yes, allocated by us |
| } |
| if (index==MI_SEGMENT_MAP_WSIZE) return NULL; |
| |
| // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? |
| |
| // search downwards for the first segment in case it is an interior pointer |
| // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough |
| // valid huge objects |
| // note: we could maintain a lowest index to speed up the path for invalid pointers? |
| size_t lobitidx; |
| size_t loindex; |
| uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); |
| if (lobits != 0) { |
| loindex = index; |
| lobitidx = mi_bsr(lobits); // lobits != 0 |
| } |
| else if (index == 0) { |
| return NULL; |
| } |
| else { |
| mi_assert_internal(index > 0); |
| uintptr_t lomask = mask; |
| loindex = index; |
| do { |
| loindex--; |
| lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); |
| } while (lomask != 0 && loindex > 0); |
| if (lomask == 0) return NULL; |
| lobitidx = mi_bsr(lomask); // lomask != 0 |
| } |
| mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); |
| // take difference as the addresses could be larger than the MAX_ADDRESS space. |
| size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; |
| segment = (mi_segment_t*)((uint8_t*)segment - diff); |
| |
| if (segment == NULL) return NULL; |
| mi_assert_internal((void*)segment < p); |
| bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); |
| mi_assert_internal(cookie_ok); |
| if mi_unlikely(!cookie_ok) return NULL; |
| if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range |
| mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); |
| return segment; |
| } |
| |
| // Is this a valid pointer in our heap? |
| static bool mi_is_valid_pointer(const void* p) { |
| return (_mi_segment_of(p) != NULL); |
| } |
| |
| mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { |
| return mi_is_valid_pointer(p); |
| } |
| |
| /* |
| // Return the full segment range belonging to a pointer |
| static void* mi_segment_range_of(const void* p, size_t* size) { |
| mi_segment_t* segment = _mi_segment_of(p); |
| if (segment == NULL) { |
| if (size != NULL) *size = 0; |
| return NULL; |
| } |
| else { |
| if (size != NULL) *size = segment->segment_size; |
| return segment; |
| } |
| mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); |
| mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); |
| mi_reset_delayed(tld); |
| mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); |
| return page; |
| } |
| */ |