blob: 2c5505feb987682b6b093f506e208fd467d16eaf [file] [log] [blame]
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <algorithm>
#include <atomic>
#include <limits>
#include <memory>
#include <vector>
#include "base/allocator/partition_allocator/src/partition_alloc/extended_api.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_base/logging.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_base/strings/stringprintf.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_base/threading/platform_thread_for_testing.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_base/time/time.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_check.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_constants.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_for_testing.h"
#include "base/allocator/partition_allocator/src/partition_alloc/partition_root.h"
#include "base/allocator/partition_allocator/src/partition_alloc/thread_cache.h"
#include "base/debug/debugging_buildflags.h"
#include "base/timer/lap_timer.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/perf/perf_result_reporter.h"
#if BUILDFLAG(IS_ANDROID) || defined(ARCH_CPU_32_BITS) || BUILDFLAG(IS_FUCHSIA)
// Some tests allocate many GB of memory, which can cause issues on Android and
// address-space exhaustion for any 32-bit process.
#define MEMORY_CONSTRAINED
#endif
#if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
#include "base/allocator/dispatcher/dispatcher.h"
#include "base/debug/allocation_trace.h"
#endif
namespace partition_alloc::internal {
namespace {
// Change kTimeLimit to something higher if you need more time to capture a
// trace.
constexpr ::base::TimeDelta kTimeLimit = ::base::Seconds(2);
constexpr int kWarmupRuns = 10000;
constexpr int kTimeCheckInterval = 100000;
constexpr size_t kAllocSize = 40;
// Size constants are mostly arbitrary, but try to simulate something like CSS
// parsing which consists of lots of relatively small objects.
constexpr int kMultiBucketMinimumSize = 24;
constexpr int kMultiBucketIncrement = 13;
// Final size is 24 + (13 * 22) = 310 bytes.
constexpr int kMultiBucketRounds = 22;
constexpr char kMetricPrefixMemoryAllocation[] = "MemoryAllocation.";
constexpr char kMetricThroughput[] = "throughput";
constexpr char kMetricTimePerAllocation[] = "time_per_allocation";
perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
perf_test::PerfResultReporter reporter(kMetricPrefixMemoryAllocation,
story_name);
reporter.RegisterImportantMetric(kMetricThroughput, "runs/s");
reporter.RegisterImportantMetric(kMetricTimePerAllocation, "ns");
return reporter;
}
enum class AllocatorType {
kSystem,
kPartitionAlloc,
kPartitionAllocWithThreadCache,
#if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
kPartitionAllocWithAllocationStackTraceRecorder,
#endif
};
class Allocator {
public:
Allocator() = default;
virtual ~Allocator() = default;
virtual void* Alloc(size_t size) = 0;
virtual void Free(void* data) = 0;
};
class SystemAllocator : public Allocator {
public:
SystemAllocator() = default;
~SystemAllocator() override = default;
void* Alloc(size_t size) override { return malloc(size); }
void Free(void* data) override { free(data); }
};
class PartitionAllocator : public Allocator {
public:
PartitionAllocator() = default;
~PartitionAllocator() override { alloc_.DestructForTesting(); }
void* Alloc(size_t size) override {
return alloc_.AllocInline<AllocFlags::kNoHooks>(size);
}
void Free(void* data) override {
// Even though it's easy to invoke the fast path with
// alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
// more common with PA-E.
PartitionRoot::FreeInlineInUnknownRoot<
partition_alloc::FreeFlags::kNoHooks>(data);
}
private:
PartitionRoot alloc_{PartitionOptions{}};
};
class PartitionAllocatorWithThreadCache : public Allocator {
public:
explicit PartitionAllocatorWithThreadCache(bool use_alternate_bucket_dist)
: scope_(allocator_.root()) {
ThreadCacheRegistry::Instance().PurgeAll();
if (!use_alternate_bucket_dist) {
allocator_.root()->SwitchToDenserBucketDistribution();
} else {
allocator_.root()->ResetBucketDistributionForTesting();
}
}
~PartitionAllocatorWithThreadCache() override = default;
void* Alloc(size_t size) override {
return allocator_.root()->AllocInline<AllocFlags::kNoHooks>(size);
}
void Free(void* data) override {
// Even though it's easy to invoke the fast path with
// alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
// more common with PA-E.
PartitionRoot::FreeInlineInUnknownRoot<
partition_alloc::FreeFlags::kNoHooks>(data);
}
private:
static constexpr partition_alloc::PartitionOptions kOpts = {
#if !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
.thread_cache = PartitionOptions::kEnabled,
#endif
};
PartitionAllocatorForTesting<internal::DisallowLeaks> allocator_{kOpts};
internal::ThreadCacheProcessScopeForTesting scope_;
};
#if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
class PartitionAllocatorWithAllocationStackTraceRecorder : public Allocator {
public:
explicit PartitionAllocatorWithAllocationStackTraceRecorder(
bool register_hooks)
: register_hooks_(register_hooks) {
if (register_hooks_) {
dispatcher_.InitializeForTesting(&recorder_);
}
}
~PartitionAllocatorWithAllocationStackTraceRecorder() override {
if (register_hooks_) {
dispatcher_.ResetForTesting();
}
}
void* Alloc(size_t size) override { return alloc_.AllocInline(size); }
void Free(void* data) override {
// Even though it's easy to invoke the fast path with
// alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
// more common with PA-E.
PartitionRoot::FreeInlineInUnknownRoot<
partition_alloc::FreeFlags::kNoHooks>(data);
}
private:
bool const register_hooks_;
PartitionRoot alloc_{PartitionOptions{}};
::base::allocator::dispatcher::Dispatcher& dispatcher_ =
::base::allocator::dispatcher::Dispatcher::GetInstance();
::base::debug::tracer::AllocationTraceRecorder recorder_;
};
#endif // BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
class TestLoopThread : public base::PlatformThreadForTesting::Delegate {
public:
TestLoopThread(float (*test_fn)(Allocator*), Allocator* allocator)
: test_fn_(test_fn), allocator_(allocator) {
PA_CHECK(base::PlatformThreadForTesting::Create(0, this, &thread_handle_));
}
float Run() {
base::PlatformThreadForTesting::Join(thread_handle_);
return laps_per_second_;
}
void ThreadMain() override { laps_per_second_ = test_fn_(allocator_); }
float (*test_fn_)(Allocator*) = nullptr;
Allocator* allocator_ = nullptr;
base::PlatformThreadHandle thread_handle_;
std::atomic<float> laps_per_second_;
};
void DisplayResults(const std::string& story_name,
float iterations_per_second) {
auto reporter = SetUpReporter(story_name);
reporter.AddResult(kMetricThroughput, iterations_per_second);
reporter.AddResult(kMetricTimePerAllocation,
static_cast<size_t>(1e9 / iterations_per_second));
}
class MemoryAllocationPerfNode {
public:
MemoryAllocationPerfNode* GetNext() const { return next_; }
void SetNext(MemoryAllocationPerfNode* p) { next_ = p; }
static void FreeAll(MemoryAllocationPerfNode* first, Allocator* alloc) {
MemoryAllocationPerfNode* cur = first;
while (cur != nullptr) {
MemoryAllocationPerfNode* next = cur->GetNext();
alloc->Free(cur);
cur = next;
}
}
private:
MemoryAllocationPerfNode* next_ = nullptr;
};
#if !defined(MEMORY_CONSTRAINED)
float SingleBucket(Allocator* allocator) {
auto* first =
reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
size_t allocated_memory = kAllocSize;
::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
MemoryAllocationPerfNode* cur = first;
do {
auto* next = reinterpret_cast<MemoryAllocationPerfNode*>(
allocator->Alloc(kAllocSize));
PA_CHECK(next != nullptr);
cur->SetNext(next);
cur = next;
timer.NextLap();
allocated_memory += kAllocSize;
// With multiple threads, can get OOM otherwise.
if (allocated_memory > 200e6) {
cur->SetNext(nullptr);
MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
cur = first;
allocated_memory = kAllocSize;
}
} while (!timer.HasTimeLimitExpired());
// next_ = nullptr only works if the class constructor is called (it's not
// called in this case because then we can allocate arbitrary-length
// payloads.)
cur->SetNext(nullptr);
MemoryAllocationPerfNode::FreeAll(first, allocator);
return timer.LapsPerSecond();
}
#endif // defined(MEMORY_CONSTRAINED)
float SingleBucketWithFree(Allocator* allocator) {
// Allocate an initial element to make sure the bucket stays set up.
void* elem = allocator->Alloc(kAllocSize);
::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
do {
void* cur = allocator->Alloc(kAllocSize);
PA_CHECK(cur != nullptr);
allocator->Free(cur);
timer.NextLap();
} while (!timer.HasTimeLimitExpired());
allocator->Free(elem);
return timer.LapsPerSecond();
}
#if !defined(MEMORY_CONSTRAINED)
float MultiBucket(Allocator* allocator) {
auto* first =
reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
MemoryAllocationPerfNode* cur = first;
size_t allocated_memory = kAllocSize;
::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
do {
for (int i = 0; i < kMultiBucketRounds; i++) {
size_t size = kMultiBucketMinimumSize + (i * kMultiBucketIncrement);
auto* next =
reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(size));
PA_CHECK(next != nullptr);
cur->SetNext(next);
cur = next;
allocated_memory += size;
}
// Can OOM with multiple threads.
if (allocated_memory > 100e6) {
cur->SetNext(nullptr);
MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
cur = first;
allocated_memory = kAllocSize;
}
timer.NextLap();
} while (!timer.HasTimeLimitExpired());
cur->SetNext(nullptr);
MemoryAllocationPerfNode::FreeAll(first, allocator);
return timer.LapsPerSecond() * kMultiBucketRounds;
}
#endif // defined(MEMORY_CONSTRAINED)
float MultiBucketWithFree(Allocator* allocator) {
std::vector<void*> elems;
elems.reserve(kMultiBucketRounds);
// Do an initial round of allocation to make sure that the buckets stay in
// use (and aren't accidentally released back to the OS).
for (int i = 0; i < kMultiBucketRounds; i++) {
void* cur =
allocator->Alloc(kMultiBucketMinimumSize + (i * kMultiBucketIncrement));
PA_CHECK(cur != nullptr);
elems.push_back(cur);
}
::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
do {
for (int i = 0; i < kMultiBucketRounds; i++) {
void* cur = allocator->Alloc(kMultiBucketMinimumSize +
(i * kMultiBucketIncrement));
PA_CHECK(cur != nullptr);
allocator->Free(cur);
}
timer.NextLap();
} while (!timer.HasTimeLimitExpired());
for (void* ptr : elems) {
allocator->Free(ptr);
}
return timer.LapsPerSecond() * kMultiBucketRounds;
}
float DirectMapped(Allocator* allocator) {
constexpr size_t kSize = 2 * 1000 * 1000;
::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
do {
void* cur = allocator->Alloc(kSize);
PA_CHECK(cur != nullptr);
allocator->Free(cur);
timer.NextLap();
} while (!timer.HasTimeLimitExpired());
return timer.LapsPerSecond();
}
std::unique_ptr<Allocator> CreateAllocator(AllocatorType type,
bool use_alternate_bucket_dist) {
switch (type) {
case AllocatorType::kSystem:
return std::make_unique<SystemAllocator>();
case AllocatorType::kPartitionAlloc:
return std::make_unique<PartitionAllocator>();
case AllocatorType::kPartitionAllocWithThreadCache:
return std::make_unique<PartitionAllocatorWithThreadCache>(
use_alternate_bucket_dist);
#if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
case AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder:
return std::make_unique<
PartitionAllocatorWithAllocationStackTraceRecorder>(true);
#endif
}
}
void LogResults(int thread_count,
AllocatorType alloc_type,
uint64_t total_laps_per_second,
uint64_t min_laps_per_second) {
PA_LOG(INFO) << "RESULTSCSV: " << thread_count << ","
<< static_cast<int>(alloc_type) << "," << total_laps_per_second
<< "," << min_laps_per_second;
}
void RunTest(int thread_count,
bool use_alternate_bucket_dist,
AllocatorType alloc_type,
float (*test_fn)(Allocator*),
float (*noisy_neighbor_fn)(Allocator*),
const char* story_base_name) {
auto alloc = CreateAllocator(alloc_type, use_alternate_bucket_dist);
std::unique_ptr<TestLoopThread> noisy_neighbor_thread = nullptr;
if (noisy_neighbor_fn) {
noisy_neighbor_thread =
std::make_unique<TestLoopThread>(noisy_neighbor_fn, alloc.get());
}
std::vector<std::unique_ptr<TestLoopThread>> threads;
for (int i = 0; i < thread_count; ++i) {
threads.push_back(std::make_unique<TestLoopThread>(test_fn, alloc.get()));
}
uint64_t total_laps_per_second = 0;
uint64_t min_laps_per_second = std::numeric_limits<uint64_t>::max();
for (int i = 0; i < thread_count; ++i) {
uint64_t laps_per_second = threads[i]->Run();
min_laps_per_second = std::min(laps_per_second, min_laps_per_second);
total_laps_per_second += laps_per_second;
}
if (noisy_neighbor_thread) {
noisy_neighbor_thread->Run();
}
char const* alloc_type_str;
switch (alloc_type) {
case AllocatorType::kSystem:
alloc_type_str = "System";
break;
case AllocatorType::kPartitionAlloc:
alloc_type_str = "PartitionAlloc";
break;
case AllocatorType::kPartitionAllocWithThreadCache:
alloc_type_str = "PartitionAllocWithThreadCache";
break;
#if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
case AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder:
alloc_type_str = "PartitionAllocWithAllocationStackTraceRecorder";
break;
#endif
}
std::string name = base::TruncatingStringPrintf(
"%s%s_%s_%d", kMetricPrefixMemoryAllocation, story_base_name,
alloc_type_str, thread_count);
DisplayResults(name + "_total", total_laps_per_second);
DisplayResults(name + "_worst", min_laps_per_second);
LogResults(thread_count, alloc_type, total_laps_per_second,
min_laps_per_second);
}
class PartitionAllocMemoryAllocationPerfTest
: public testing::TestWithParam<std::tuple<int, bool, AllocatorType>> {};
// Only one partition with a thread cache: cannot use the thread cache when
// PartitionAlloc is malloc().
INSTANTIATE_TEST_SUITE_P(
,
PartitionAllocMemoryAllocationPerfTest,
::testing::Combine(
::testing::Values(1, 2, 3, 4),
::testing::Values(false, true),
::testing::Values(
AllocatorType::kSystem,
AllocatorType::kPartitionAlloc,
AllocatorType::kPartitionAllocWithThreadCache
#if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
,
AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder
#endif
)));
// This test (and the other one below) allocates a large amount of memory, which
// can cause issues on Android.
#if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest, SingleBucket) {
auto params = GetParam();
RunTest(std::get<int>(params), std::get<bool>(params),
std::get<AllocatorType>(params), SingleBucket, nullptr,
"SingleBucket");
}
#endif // defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest, SingleBucketWithFree) {
auto params = GetParam();
RunTest(std::get<int>(params), std::get<bool>(params),
std::get<AllocatorType>(params), SingleBucketWithFree, nullptr,
"SingleBucketWithFree");
}
#if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest, MultiBucket) {
auto params = GetParam();
RunTest(std::get<int>(params), std::get<bool>(params),
std::get<AllocatorType>(params), MultiBucket, nullptr, "MultiBucket");
}
#endif // defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest, MultiBucketWithFree) {
auto params = GetParam();
RunTest(std::get<int>(params), std::get<bool>(params),
std::get<AllocatorType>(params), MultiBucketWithFree, nullptr,
"MultiBucketWithFree");
}
TEST_P(PartitionAllocMemoryAllocationPerfTest, DirectMapped) {
auto params = GetParam();
RunTest(std::get<int>(params), std::get<bool>(params),
std::get<AllocatorType>(params), DirectMapped, nullptr,
"DirectMapped");
}
#if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,
DISABLED_MultiBucketWithNoisyNeighbor) {
auto params = GetParam();
RunTest(std::get<int>(params), std::get<bool>(params),
std::get<AllocatorType>(params), MultiBucket, DirectMapped,
"MultiBucketWithNoisyNeighbor");
}
#endif // !defined(MEMORY_CONSTRAINED)
} // namespace
} // namespace partition_alloc::internal