| // Copyright 2022 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/i18n/icu_mergeable_data_file.h" |
| |
| #include <sys/mman.h> |
| |
| #include "base/hash/hash.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/threading/scoped_blocking_call.h" |
| #include "build/chromeos_buildflags.h" |
| |
| namespace base::i18n { |
| |
| // Enable merging of `icudtl.dat` in Lacros. |
| BASE_FEATURE(kLacrosMergeIcuDataFile, |
| "LacrosMergeIcuDataFile", |
| base::FEATURE_ENABLED_BY_DEFAULT); |
| |
| namespace { |
| |
| #if BUILDFLAG(IS_CHROMEOS_DEVICE) |
| // Path of Ash's ICU data file. |
| constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat"; |
| #endif // BUILDFLAG(IS_CHROMEOS_DEVICE) |
| |
| // Expected size of a system page. |
| constexpr int64_t kPageSize = 0x1000; |
| |
| // Size of a page hash. Changing this will break compatibility |
| // with existing `icudtl.dat.hash` files, so be careful. |
| constexpr size_t kHashBytes = 8; |
| static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes); |
| |
| inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) { |
| return FastHash(base::make_span(page, static_cast<size_t>(kPageSize))); |
| } |
| |
| IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) { |
| DCHECK_EQ(0ul, offset % kHashBytes); |
| IcuMergeableDataFile::HashType hash = 0; |
| for (size_t i = 0; i < kHashBytes; i++) { |
| IcuMergeableDataFile::HashType byte = data[offset + i]; |
| hash |= byte << (i * 8); |
| } |
| return hash; |
| } |
| |
| constexpr size_t NPages(size_t length) { |
| return (length + kPageSize - 1) / kPageSize; |
| } |
| |
| } // namespace |
| |
| class AshMemoryMappedFile { |
| public: |
| bool Initialize(File ash_file) { |
| fd_ = ash_file.GetPlatformFile(); |
| return memory_mapped_file_.Initialize(std::move(ash_file)); |
| } |
| |
| PlatformFile fd() const { return fd_; } |
| const uint8_t* data() const { return memory_mapped_file_.data(); } |
| size_t length() const { return memory_mapped_file_.length(); } |
| |
| private: |
| PlatformFile fd_; |
| MemoryMappedFile memory_mapped_file_; |
| }; |
| |
| std::unique_ptr<AshMemoryMappedFile> MmapAshFile( |
| const FilePath& ash_file_path) { |
| ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); |
| |
| // Open Ash's data file. |
| File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ); |
| |
| // Mmap Ash's data file. |
| auto ash_mapped_file = std::make_unique<AshMemoryMappedFile>(); |
| bool map_successful = ash_mapped_file->Initialize(std::move(ash_file)); |
| if (!map_successful) { |
| PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat"; |
| return nullptr; |
| } |
| |
| return ash_mapped_file; |
| } |
| |
| // Class wrapping the memory-merging logic for `icudtl.dat`. |
| IcuMergeableDataFile::IcuMergeableDataFile() = default; |
| |
| IcuMergeableDataFile::~IcuMergeableDataFile() { |
| if (lacros_data_) { |
| ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); |
| munmap(lacros_data_, lacros_length_); |
| } |
| } |
| |
| IcuMergeableDataFile::Hashes::Hashes() = default; |
| IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash, |
| std::vector<HashType> lacros) |
| : ash(std::move(ash)), lacros(std::move(lacros)) {} |
| IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default; |
| IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=( |
| Hashes&& other) = default; |
| IcuMergeableDataFile::Hashes::~Hashes() = default; |
| |
| bool IcuMergeableDataFile::Initialize(File lacros_file, |
| MemoryMappedFile::Region region) { |
| DCHECK(region == MemoryMappedFile::Region::kWholeFile); |
| DCHECK(!lacros_file_.IsValid()) << "ICUDataFile::Initialize called twice"; |
| |
| lacros_file_ = std::move(lacros_file); |
| int64_t lacros_length = lacros_file_.GetLength(); |
| if (lacros_length < 0) { |
| return false; |
| } |
| // Narrow to size_t, since it's used for pointer arithmetic, mmap and other |
| // APIs that accept size_t. |
| lacros_length_ = base::checked_cast<size_t>(lacros_length); |
| |
| // Map Lacros's version of `icudtl.dat`, then attempt merging with Ash. |
| bool map_successful = MmapLacrosFile(/*remap=*/false); |
| |
| #if BUILDFLAG(IS_CHROMEOS_DEVICE) |
| // If we're inside an actual ChromeOS system (i.e. not just in |
| // linux-lacros-rel) then we can expect Ash Chrome (and its version of |
| // `icudtl.dat`) to be present in the default directory. |
| // In that case, we can attempt merging. |
| if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) { |
| bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath)); |
| // If we hit a critical failure while merging, remap Lacros's version. |
| if (!merge_successful) { |
| PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed"; |
| map_successful = MmapLacrosFile(/*remap=*/true); |
| } |
| } |
| #endif // BUILDFLAG(IS_CHROMEOS_DEVICE) |
| |
| return map_successful; |
| } |
| |
| const uint8_t* IcuMergeableDataFile::data() const { |
| return static_cast<const uint8_t*>(lacros_data_); |
| } |
| |
| bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) { |
| // Verify the assumption that page size is 4K. |
| DCHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize); |
| |
| // Mmap Ash's data file. |
| auto ash_file = MmapAshFile(ash_file_path); |
| if (!ash_file) |
| return true; // Non-critical failure. |
| |
| // Calculate hashes for each page in Ash and Lacros's data files. |
| Hashes hashes = CalculateHashes(*ash_file, ash_file_path); |
| |
| // Find Lacros's ICU pages that are duplicated in Ash. |
| size_t lacros_offset = 0; |
| while (lacros_offset < lacros_length_) { |
| Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset); |
| // If there's no overlap, move to the next page and keep scanning. |
| if (ash_overlap.length == 0) { |
| lacros_offset += kPageSize; |
| continue; |
| } |
| |
| // Found a sequence of equal pages, merge them with Ash. |
| bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset); |
| if (!merge_successful) |
| return false; // Critical failure. |
| |
| lacros_offset += ash_overlap.length; |
| } |
| |
| return true; // Success. |
| } |
| |
| bool IcuMergeableDataFile::MmapLacrosFile(bool remap) { |
| ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); |
| |
| if (remap) { |
| // If `remap` == true, we add the MAP_FIXED option to unmap the |
| // existing map and replace it with the new one in a single operation. |
| DCHECK_NE(lacros_data_, nullptr); |
| lacros_data_ = static_cast<uint8_t*>( |
| mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE, |
| lacros_file_.GetPlatformFile(), 0)); |
| } else { |
| // Otherwise, simply map the file. |
| lacros_data_ = static_cast<uint8_t*>( |
| mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE, |
| lacros_file_.GetPlatformFile(), 0)); |
| } |
| |
| if (lacros_data_ == MAP_FAILED) { |
| lacros_data_ = nullptr; |
| PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap( |
| const AshMemoryMappedFile& ash_file, |
| const Hashes& hashes, |
| size_t lacros_offset) const { |
| // Search for equal pages by hash. |
| HashType hash = hashes.lacros[lacros_offset / kPageSize]; |
| auto search = hashes.ash.find(hash); |
| if (search == hashes.ash.end()) |
| return {0, 0}; |
| |
| // Count how many pages (if any) have the same content. |
| size_t ash_offset = search->second; |
| size_t overlap_length = |
| kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset, |
| lacros_data_ + lacros_offset); |
| |
| return {ash_offset, overlap_length}; |
| } |
| |
| bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file, |
| const Slice& ash_overlap, |
| size_t lacros_offset) { |
| ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); |
| |
| // Unmap from Lacros's file and map from Ash's file instead. |
| // NOTE: "[...] If the memory region specified by addr and length overlaps |
| // pages of any existing mapping(s), then the overlapped part of the |
| // existing mapping(s) will be discarded. If the specified address |
| // cannot be used, mmap() will fail." |
| // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html |
| uint8_t* map_result = static_cast<uint8_t*>( |
| mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ, |
| MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset)); |
| |
| if (map_result == MAP_FAILED) { |
| PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| size_t IcuMergeableDataFile::CountEqualPages( |
| const AshMemoryMappedFile& ash_file, |
| const uint8_t* ash_page, |
| const uint8_t* lacros_page) const { |
| size_t pages = 0; |
| const uint8_t* ash_end = ash_file.data() + ash_file.length(); |
| const uint8_t* lacros_end = lacros_data_ + lacros_length_; |
| |
| while (ash_page < ash_end && lacros_page < lacros_end && |
| memcmp(ash_page, lacros_page, kPageSize) == 0) { |
| ash_page += kPageSize; |
| lacros_page += kPageSize; |
| pages++; |
| } |
| |
| return pages; |
| } |
| |
| IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes( |
| const AshMemoryMappedFile& ash_file, |
| const FilePath& ash_file_path) { |
| // Try loading hashes from the pre-computed files first. |
| Hashes hashes; |
| used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes); |
| |
| if (!used_cached_hashes_) { |
| // Calculate hashes for each page in Ash's data file. |
| std::vector<HashOffset> ash_hashes; |
| ash_hashes.reserve(NPages(ash_file.length())); |
| for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) { |
| // NOTE: "POSIX specifies that the system shall always zero fill any |
| // partial page at the end of the object [...]". |
| // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html |
| // |
| // Therefore this code works even if the size of Ash's `icudtl.dat` is not |
| // a multiple of the page size. |
| HashType hash = HashPage(ash_file.data() + offset); |
| ash_hashes.emplace_back(hash, offset); |
| } |
| |
| // Calculate hashes for each page in Lacros's data file. |
| hashes.lacros.reserve(NPages(lacros_length_)); |
| for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) { |
| HashType hash = HashPage(lacros_data_ + offset); |
| hashes.lacros.emplace_back(hash); |
| } |
| |
| hashes.ash = HashToOffsetMap(std::move(ash_hashes)); |
| } |
| |
| return hashes; |
| } |
| |
| bool IcuMergeableDataFile::MaybeLoadCachedHashes( |
| const AshMemoryMappedFile& ash_file, |
| const FilePath& ash_file_path, |
| Hashes& hashes) { |
| FilePath ash_hash_path = |
| ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension); |
| FilePath lacros_hash_path = |
| GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension); |
| |
| // Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent |
| // with the current version of `icudtl.dat`. |
| MemoryMappedFile ash_hash_file; |
| size_t ash_pages = NPages(ash_file.length()); |
| bool result = ash_hash_file.Initialize(ash_hash_path); |
| if (!result || (ash_hash_file.length() % kHashBytes) || |
| ((ash_hash_file.length() / kHashBytes) != ash_pages)) { |
| return false; |
| } |
| |
| // Same for Lacros's `icudtl.dat.hash`. |
| MemoryMappedFile lacros_hash_file; |
| size_t lacros_pages = NPages(lacros_length_); |
| result = lacros_hash_file.Initialize(lacros_hash_path); |
| if (!result || (lacros_hash_file.length() % kHashBytes) || |
| ((lacros_hash_file.length() / kHashBytes) != lacros_pages)) { |
| return false; |
| } |
| |
| // Load Ash's hashes. |
| std::vector<HashOffset> ash_hashes; |
| ash_hashes.reserve(ash_pages); |
| for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) { |
| HashType hash = ReadHash(ash_hash_file.data(), i); |
| size_t offset = (i / kHashBytes) * kPageSize; |
| ash_hashes.emplace_back(hash, offset); |
| } |
| |
| // Load Lacros's hashes. |
| hashes.lacros.reserve(lacros_pages); |
| for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) { |
| HashType hash = ReadHash(lacros_hash_file.data(), i); |
| hashes.lacros.emplace_back(hash); |
| } |
| |
| hashes.ash = HashToOffsetMap(std::move(ash_hashes)); |
| return true; |
| } |
| |
| FilePath IcuMergeableDataFile::GetLacrosFilePath() { |
| // /proc/self/fd/<fd> |
| // This is a subdirectory containing one entry for each file |
| // which the process has open, named by its file descriptor, |
| // and which is a symbolic link to the actual file. |
| // Reference: proc(5) - Linux manual page. |
| char path[PATH_MAX]; |
| FilePath proc_path = |
| FilePath("/proc/self/fd/") |
| .AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile())); |
| |
| // We read the content of the symbolic link to find the path of the |
| // file associated with the file descriptor. |
| int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path)); |
| DCHECK_NE(path_len, -1); |
| DCHECK_LT(path_len, PATH_MAX); |
| |
| return FilePath(std::string(path, 0, path_len)); |
| } |
| |
| } // namespace base::i18n |