blob: 39ddcac0a87bb80ec036b76cec87afb7bf227496 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/i18n/icu_mergeable_data_file.h"
#include <sys/mman.h>
#include "base/hash/hash.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
#include "base/threading/scoped_blocking_call.h"
#include "build/chromeos_buildflags.h"
namespace base::i18n {
// Enable merging of `icudtl.dat` in Lacros.
BASE_FEATURE(kLacrosMergeIcuDataFile,
"LacrosMergeIcuDataFile",
base::FEATURE_ENABLED_BY_DEFAULT);
namespace {
#if BUILDFLAG(IS_CHROMEOS_DEVICE)
// Path of Ash's ICU data file.
constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat";
#endif // BUILDFLAG(IS_CHROMEOS_DEVICE)
// Expected size of a system page.
constexpr int64_t kPageSize = 0x1000;
// Size of a page hash. Changing this will break compatibility
// with existing `icudtl.dat.hash` files, so be careful.
constexpr size_t kHashBytes = 8;
static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes);
inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) {
return FastHash(base::make_span(page, static_cast<size_t>(kPageSize)));
}
IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) {
DCHECK_EQ(0ul, offset % kHashBytes);
IcuMergeableDataFile::HashType hash = 0;
for (size_t i = 0; i < kHashBytes; i++) {
IcuMergeableDataFile::HashType byte = data[offset + i];
hash |= byte << (i * 8);
}
return hash;
}
constexpr size_t NPages(size_t length) {
return (length + kPageSize - 1) / kPageSize;
}
} // namespace
class AshMemoryMappedFile {
public:
bool Initialize(File ash_file) {
fd_ = ash_file.GetPlatformFile();
return memory_mapped_file_.Initialize(std::move(ash_file));
}
PlatformFile fd() const { return fd_; }
const uint8_t* data() const { return memory_mapped_file_.data(); }
size_t length() const { return memory_mapped_file_.length(); }
private:
PlatformFile fd_;
MemoryMappedFile memory_mapped_file_;
};
std::unique_ptr<AshMemoryMappedFile> MmapAshFile(
const FilePath& ash_file_path) {
ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
// Open Ash's data file.
File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ);
// Mmap Ash's data file.
auto ash_mapped_file = std::make_unique<AshMemoryMappedFile>();
bool map_successful = ash_mapped_file->Initialize(std::move(ash_file));
if (!map_successful) {
PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat";
return nullptr;
}
return ash_mapped_file;
}
// Class wrapping the memory-merging logic for `icudtl.dat`.
IcuMergeableDataFile::IcuMergeableDataFile() = default;
IcuMergeableDataFile::~IcuMergeableDataFile() {
if (lacros_data_) {
ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
munmap(lacros_data_, lacros_length_);
}
}
IcuMergeableDataFile::Hashes::Hashes() = default;
IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash,
std::vector<HashType> lacros)
: ash(std::move(ash)), lacros(std::move(lacros)) {}
IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default;
IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=(
Hashes&& other) = default;
IcuMergeableDataFile::Hashes::~Hashes() = default;
bool IcuMergeableDataFile::Initialize(File lacros_file,
MemoryMappedFile::Region region) {
DCHECK(region == MemoryMappedFile::Region::kWholeFile);
DCHECK(!lacros_file_.IsValid()) << "ICUDataFile::Initialize called twice";
lacros_file_ = std::move(lacros_file);
int64_t lacros_length = lacros_file_.GetLength();
if (lacros_length < 0) {
return false;
}
// Narrow to size_t, since it's used for pointer arithmetic, mmap and other
// APIs that accept size_t.
lacros_length_ = base::checked_cast<size_t>(lacros_length);
// Map Lacros's version of `icudtl.dat`, then attempt merging with Ash.
bool map_successful = MmapLacrosFile(/*remap=*/false);
#if BUILDFLAG(IS_CHROMEOS_DEVICE)
// If we're inside an actual ChromeOS system (i.e. not just in
// linux-lacros-rel) then we can expect Ash Chrome (and its version of
// `icudtl.dat`) to be present in the default directory.
// In that case, we can attempt merging.
if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) {
bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath));
// If we hit a critical failure while merging, remap Lacros's version.
if (!merge_successful) {
PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed";
map_successful = MmapLacrosFile(/*remap=*/true);
}
}
#endif // BUILDFLAG(IS_CHROMEOS_DEVICE)
return map_successful;
}
const uint8_t* IcuMergeableDataFile::data() const {
return static_cast<const uint8_t*>(lacros_data_);
}
bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) {
// Verify the assumption that page size is 4K.
DCHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize);
// Mmap Ash's data file.
auto ash_file = MmapAshFile(ash_file_path);
if (!ash_file)
return true; // Non-critical failure.
// Calculate hashes for each page in Ash and Lacros's data files.
Hashes hashes = CalculateHashes(*ash_file, ash_file_path);
// Find Lacros's ICU pages that are duplicated in Ash.
size_t lacros_offset = 0;
while (lacros_offset < lacros_length_) {
Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset);
// If there's no overlap, move to the next page and keep scanning.
if (ash_overlap.length == 0) {
lacros_offset += kPageSize;
continue;
}
// Found a sequence of equal pages, merge them with Ash.
bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset);
if (!merge_successful)
return false; // Critical failure.
lacros_offset += ash_overlap.length;
}
return true; // Success.
}
bool IcuMergeableDataFile::MmapLacrosFile(bool remap) {
ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
if (remap) {
// If `remap` == true, we add the MAP_FIXED option to unmap the
// existing map and replace it with the new one in a single operation.
DCHECK_NE(lacros_data_, nullptr);
lacros_data_ = static_cast<uint8_t*>(
mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE,
lacros_file_.GetPlatformFile(), 0));
} else {
// Otherwise, simply map the file.
lacros_data_ = static_cast<uint8_t*>(
mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE,
lacros_file_.GetPlatformFile(), 0));
}
if (lacros_data_ == MAP_FAILED) {
lacros_data_ = nullptr;
PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat";
return false;
}
return true;
}
IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap(
const AshMemoryMappedFile& ash_file,
const Hashes& hashes,
size_t lacros_offset) const {
// Search for equal pages by hash.
HashType hash = hashes.lacros[lacros_offset / kPageSize];
auto search = hashes.ash.find(hash);
if (search == hashes.ash.end())
return {0, 0};
// Count how many pages (if any) have the same content.
size_t ash_offset = search->second;
size_t overlap_length =
kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset,
lacros_data_ + lacros_offset);
return {ash_offset, overlap_length};
}
bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file,
const Slice& ash_overlap,
size_t lacros_offset) {
ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
// Unmap from Lacros's file and map from Ash's file instead.
// NOTE: "[...] If the memory region specified by addr and length overlaps
// pages of any existing mapping(s), then the overlapped part of the
// existing mapping(s) will be discarded. If the specified address
// cannot be used, mmap() will fail."
// Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
uint8_t* map_result = static_cast<uint8_t*>(
mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ,
MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset));
if (map_result == MAP_FAILED) {
PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging";
return false;
}
return true;
}
size_t IcuMergeableDataFile::CountEqualPages(
const AshMemoryMappedFile& ash_file,
const uint8_t* ash_page,
const uint8_t* lacros_page) const {
size_t pages = 0;
const uint8_t* ash_end = ash_file.data() + ash_file.length();
const uint8_t* lacros_end = lacros_data_ + lacros_length_;
while (ash_page < ash_end && lacros_page < lacros_end &&
memcmp(ash_page, lacros_page, kPageSize) == 0) {
ash_page += kPageSize;
lacros_page += kPageSize;
pages++;
}
return pages;
}
IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes(
const AshMemoryMappedFile& ash_file,
const FilePath& ash_file_path) {
// Try loading hashes from the pre-computed files first.
Hashes hashes;
used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes);
if (!used_cached_hashes_) {
// Calculate hashes for each page in Ash's data file.
std::vector<HashOffset> ash_hashes;
ash_hashes.reserve(NPages(ash_file.length()));
for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) {
// NOTE: "POSIX specifies that the system shall always zero fill any
// partial page at the end of the object [...]".
// Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
//
// Therefore this code works even if the size of Ash's `icudtl.dat` is not
// a multiple of the page size.
HashType hash = HashPage(ash_file.data() + offset);
ash_hashes.emplace_back(hash, offset);
}
// Calculate hashes for each page in Lacros's data file.
hashes.lacros.reserve(NPages(lacros_length_));
for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) {
HashType hash = HashPage(lacros_data_ + offset);
hashes.lacros.emplace_back(hash);
}
hashes.ash = HashToOffsetMap(std::move(ash_hashes));
}
return hashes;
}
bool IcuMergeableDataFile::MaybeLoadCachedHashes(
const AshMemoryMappedFile& ash_file,
const FilePath& ash_file_path,
Hashes& hashes) {
FilePath ash_hash_path =
ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension);
FilePath lacros_hash_path =
GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension);
// Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent
// with the current version of `icudtl.dat`.
MemoryMappedFile ash_hash_file;
size_t ash_pages = NPages(ash_file.length());
bool result = ash_hash_file.Initialize(ash_hash_path);
if (!result || (ash_hash_file.length() % kHashBytes) ||
((ash_hash_file.length() / kHashBytes) != ash_pages)) {
return false;
}
// Same for Lacros's `icudtl.dat.hash`.
MemoryMappedFile lacros_hash_file;
size_t lacros_pages = NPages(lacros_length_);
result = lacros_hash_file.Initialize(lacros_hash_path);
if (!result || (lacros_hash_file.length() % kHashBytes) ||
((lacros_hash_file.length() / kHashBytes) != lacros_pages)) {
return false;
}
// Load Ash's hashes.
std::vector<HashOffset> ash_hashes;
ash_hashes.reserve(ash_pages);
for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) {
HashType hash = ReadHash(ash_hash_file.data(), i);
size_t offset = (i / kHashBytes) * kPageSize;
ash_hashes.emplace_back(hash, offset);
}
// Load Lacros's hashes.
hashes.lacros.reserve(lacros_pages);
for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) {
HashType hash = ReadHash(lacros_hash_file.data(), i);
hashes.lacros.emplace_back(hash);
}
hashes.ash = HashToOffsetMap(std::move(ash_hashes));
return true;
}
FilePath IcuMergeableDataFile::GetLacrosFilePath() {
// /proc/self/fd/<fd>
// This is a subdirectory containing one entry for each file
// which the process has open, named by its file descriptor,
// and which is a symbolic link to the actual file.
// Reference: proc(5) - Linux manual page.
char path[PATH_MAX];
FilePath proc_path =
FilePath("/proc/self/fd/")
.AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile()));
// We read the content of the symbolic link to find the path of the
// file associated with the file descriptor.
int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path));
DCHECK_NE(path_len, -1);
DCHECK_LT(path_len, PATH_MAX);
return FilePath(std::string(path, 0, path_len));
}
} // namespace base::i18n