blob: d13d7b82ec8ec2e29ceb54e5ef66e700935862e9 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- mode: C++ -*-
//
// Copyright 2022 Google LLC
//
// Licensed under the Apache License v2.0 with LLVM Exceptions (the
// "License"); you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
//
// https://llvm.org/LICENSE.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Aleksei Vetrov
#include "dwarf_wrappers.h"
#include <dwarf.h>
#include <elf.h>
#include <elfutils/libdw.h>
#include <elfutils/libdwfl.h>
#include <fcntl.h>
#include <cstddef>
#include <cstdint>
#include <ios>
#include <memory>
#include <optional>
#include <ostream>
#include <string>
#include <utility>
#include <vector>
#include "error.h"
namespace stg {
namespace dwarf {
std::ostream& operator<<(std::ostream& os, const Address& address) {
return os << Hex(address.value) << (address.is_tls ? " (TLS)" : "");
}
namespace {
static const Dwfl_Callbacks kDwflCallbacks = {
.find_elf = nullptr,
.find_debuginfo = dwfl_standard_find_debuginfo,
.section_address = dwfl_offline_section_address,
.debuginfo_path = nullptr};
constexpr int kReturnOk = 0;
constexpr int kReturnNoEntry = 1;
std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
uint32_t attribute) {
// Create an optional with default-initialized value already inside
std::optional<Dwarf_Attribute> result(std::in_place);
// "integrate" automatically resolves DW_AT_abstract_origin and
// DW_AT_specification references, fetching the attribute from the linked DIE.
//
// libdw has infinite loop protection, as it stops after 16 dereferences.
// TODO: don't use dwarf_attr_integrate by default
if (!dwarf_attr_integrate(die, attribute, &result.value())) {
result.reset();
}
return result;
}
// Get the attribute directly from DIE without following DW_AT_specification and
// DW_AT_abstract_origin references.
std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die,
uint32_t attribute) {
// Create an optional with default-initialized value already inside
std::optional<Dwarf_Attribute> result(std::in_place);
if (!dwarf_attr(die, attribute, &result.value())) {
result.reset();
}
return result;
}
void CheckOrDwflError(bool condition, const char* caller) {
if (!condition) {
int dwfl_error = dwfl_errno();
const char* errmsg = dwfl_errmsg(dwfl_error);
if (errmsg == nullptr) {
// There are some cases when DWFL fails to produce an error message.
Die() << caller << " returned error code " << Hex(dwfl_error);
}
Die() << caller << " returned error: " << errmsg;
}
}
std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) {
switch (operand.atom) {
case DW_OP_addr:
case DW_OP_const1u:
case DW_OP_const2u:
case DW_OP_const4u:
case DW_OP_const8u:
case DW_OP_constu:
return operand.number;
case DW_OP_const1s:
case DW_OP_const2s:
case DW_OP_const4s:
case DW_OP_const8s:
case DW_OP_consts:
if (static_cast<int64_t>(operand.number) < 0) {
// Atom is not an unsigned constant
return std::nullopt;
}
return operand.number;
case DW_OP_lit0...DW_OP_lit31:
return operand.atom - DW_OP_lit0;
default:
return std::nullopt;
}
}
struct Expression {
const Dwarf_Op& operator[](size_t i) const {
return atoms[i];
}
Dwarf_Op* atoms = nullptr;
size_t length = 0;
};
std::optional<Expression> MaybeGetExpression(Dwarf_Attribute& attribute) {
Expression result;
Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) ==
kReturnOk) << "dwarf_getlocation returned error";
// If no location attribute is present or has an empty location description,
// the variable is present in the source but not in the object code.
// So zero length expression is equivalent of no location attribute.
if (result.length == 0) {
return std::nullopt;
}
Check(result.atoms != nullptr)
<< "dwarf_getlocation returned non-empty expression with NULL atoms";
return result;
}
} // namespace
Handler::Handler(const std::string& path) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
CheckOrDwflError(dwfl_.get(), "dwfl_begin");
// Add data to process to dwfl
dwfl_module_ =
dwfl_report_offline(dwfl_.get(), path.c_str(), path.c_str(), -1);
InitialiseDwarf();
}
Handler::Handler(char* data, size_t size) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
CheckOrDwflError(dwfl_.get(), "dwfl_begin");
// Check if ELF can be opened from input data, because DWFL couldn't handle
// memory, that is not ELF.
// TODO: remove this workaround
Elf* elf = elf_memory(data, size);
Check(elf != nullptr) << "Input data is not ELF";
elf_end(elf);
// Add data to process to dwfl
dwfl_module_ = dwfl_report_offline_memory(dwfl_.get(), "<memory>", "<memory>",
data, size);
InitialiseDwarf();
}
void Handler::InitialiseDwarf() {
CheckOrDwflError(dwfl_.get(), "dwfl_report_offline");
// Finish adding files to dwfl and process them
CheckOrDwflError(dwfl_report_end(dwfl_.get(), nullptr, nullptr) == kReturnOk,
"dwfl_report_end");
GElf_Addr loadbase = 0; // output argument for dwfl, unused by us
dwarf_ = dwfl_module_getdwarf(dwfl_module_, &loadbase);
CheckOrDwflError(dwarf_, "dwfl_module_getdwarf");
}
Elf* Handler::GetElf() {
GElf_Addr loadbase = 0; // output argument for dwfl, unused by us
Elf* elf = dwfl_module_getelf(dwfl_module_, &loadbase);
CheckOrDwflError(elf, "dwfl_module_getelf");
return elf;
}
std::vector<CompilationUnit> Handler::GetCompilationUnits() {
std::vector<CompilationUnit> result;
Dwarf_Off offset = 0;
while (true) {
Dwarf_Off next_offset;
size_t header_size = 0;
Dwarf_Half version = 0;
int return_code =
dwarf_next_unit(dwarf_, offset, &next_offset, &header_size, &version,
nullptr, nullptr, nullptr, nullptr, nullptr);
Check(return_code == kReturnOk || return_code == kReturnNoEntry)
<< "dwarf_next_unit returned error";
if (return_code == kReturnNoEntry) {
break;
}
result.push_back({version, {}});
Check(dwarf_offdie(dwarf_, offset + header_size, &result.back().entry.die))
<< "dwarf_offdie returned error";
offset = next_offset;
}
return result;
}
std::vector<Entry> Entry::GetChildren() {
Entry child;
int return_code = dwarf_child(&die, &child.die);
Check(return_code == kReturnOk || return_code == kReturnNoEntry)
<< "dwarf_child returned error";
std::vector<Entry> result;
while (return_code == kReturnOk) {
result.push_back(child);
return_code = dwarf_siblingof(&child.die, &child.die);
Check(return_code == kReturnOk || return_code == kReturnNoEntry)
<< "dwarf_siblingof returned error";
}
return result;
}
int Entry::GetTag() {
return dwarf_tag(&die);
}
Dwarf_Off Entry::GetOffset() {
return dwarf_dieoffset(&die);
}
std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) {
std::optional<std::string> result;
auto dwarf_attribute = GetAttribute(&die, attribute);
if (!dwarf_attribute) {
return result;
}
const char* value = dwarf_formstring(&dwarf_attribute.value());
Check(value != nullptr) << "dwarf_formstring returned error";
result.emplace(value);
return result;
}
std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) {
std::optional<std::string> result;
auto dwarf_attribute = GetDirectAttribute(&die, attribute);
if (!dwarf_attribute) {
return result;
}
const char* value = dwarf_formstring(&dwarf_attribute.value());
Check(value != nullptr) << "dwarf_formstring returned error";
result.emplace(value);
return result;
}
std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
auto dwarf_attribute = GetAttribute(&die, attribute);
if (!dwarf_attribute) {
return {};
}
uint64_t value;
if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
Die() << "dwarf_formudata returned error";
}
return value;
}
bool Entry::GetFlag(uint32_t attribute) {
bool result = false;
auto dwarf_attribute = (attribute == DW_AT_declaration)
? GetDirectAttribute(&die, attribute)
: GetAttribute(&die, attribute);
if (!dwarf_attribute) {
return result;
}
Check(dwarf_formflag(&dwarf_attribute.value(), &result) == kReturnOk)
<< "dwarf_formflag returned error";
return result;
}
std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
std::optional<Entry> result;
auto dwarf_attribute = GetAttribute(&die, attribute);
if (!dwarf_attribute) {
return result;
}
result.emplace();
Check(dwarf_formref_die(&dwarf_attribute.value(), &result->die))
<< "dwarf_formref_die returned error";
return result;
}
namespace {
std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) {
const auto expression_opt = MaybeGetExpression(attribute);
if (!expression_opt) {
return {};
}
const Expression& expression = *expression_opt;
Dwarf_Attribute result_attribute;
if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) ==
kReturnOk) {
uint64_t address;
Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk)
<< "dwarf_formaddr returned error";
return Address{.value = address, .is_tls = false};
}
if (expression.length == 1 && expression[0].atom == DW_OP_addr) {
// DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
// manually extract the address from expression.
return Address{.value = expression[0].number, .is_tls = false};
}
// TLS operation has different encodings in Clang and GCC:
// * Clang 14 uses DW_OP_GNU_push_tls_address
// * GCC 12 uses DW_OP_form_tls_address
if (expression.length == 2 &&
(expression[1].atom == DW_OP_GNU_push_tls_address ||
expression[1].atom == DW_OP_form_tls_address)) {
// TLS symbols address may be incorrect because of unsupported
// relocations. Resetting it to zero the same way as it is done in
// elf::Reader::MaybeAddTypeInfo.
// TODO: match TLS variables by address
return Address{.value = 0, .is_tls = true};
}
Die() << "Unsupported data location expression";
}
} // namespace
std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) {
auto dwarf_attribute = GetAttribute(&die, attribute);
if (!dwarf_attribute) {
return {};
}
if (attribute == DW_AT_location) {
return GetAddressFromLocation(*dwarf_attribute);
}
Address address;
Check(dwarf_formaddr(&dwarf_attribute.value(), &address.value) == kReturnOk)
<< "dwarf_formaddr returned error";
address.is_tls = false;
return address;
}
std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
auto attribute = GetAttribute(&die, DW_AT_data_member_location);
if (!attribute) {
return {};
}
uint64_t offset;
// Try to interpret attribute as an unsigned integer constant
if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) {
return offset;
}
// Parse location expression
const auto expression_opt = MaybeGetExpression(attribute.value());
if (!expression_opt) {
return {};
}
const Expression& expression = *expression_opt;
// Parse virtual base classes offset, which looks like this:
// [0] = DW_OP_dup
// [1] = DW_OP_deref
// [2] = constant operand
// [3] = DW_OP_minus
// [4] = DW_OP_deref
// [5] = DW_OP_plus
// This form is not in the standard, but hardcoded in compilers:
// * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611
// * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029
if (expression.length == 6 &&
expression[0].atom == DW_OP_dup &&
expression[1].atom == DW_OP_deref &&
expression[3].atom == DW_OP_minus &&
expression[4].atom == DW_OP_deref &&
expression[5].atom == DW_OP_plus) {
const auto byte_offset = MaybeGetUnsignedOperand(expression[2]);
if (byte_offset) {
return byte_offset;
}
}
Die() << "Unsupported member offset expression, " << Hex(GetOffset());
}
std::optional<uint64_t> Entry::MaybeGetVtableOffset() {
auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location);
if (!attribute) {
return {};
}
// Parse location expression
const auto expression_opt = MaybeGetExpression(attribute.value());
if (!expression_opt) {
return {};
}
const Expression& expression = *expression_opt;
// We expect compilers to produce expression with one constant operand
if (expression.length == 1) {
const auto offset = MaybeGetUnsignedOperand(expression[0]);
if (offset) {
return offset;
}
}
Die() << "Unsupported vtable offset expression, " << Hex(GetOffset());
}
std::optional<uint64_t> Entry::MaybeGetCount() {
auto lower_bound_attribute = MaybeGetUnsignedConstant(DW_AT_lower_bound);
if (lower_bound_attribute && *lower_bound_attribute != 0) {
Die() << "Non-zero DW_AT_lower_bound is not supported";
}
auto upper_bound_attribute = GetAttribute(&die, DW_AT_upper_bound);
auto count_attribute = GetAttribute(&die, DW_AT_count);
if (!upper_bound_attribute && !count_attribute) {
return {};
}
if (upper_bound_attribute && count_attribute) {
Die() << "Both DW_AT_upper_bound and DW_AT_count given";
}
Dwarf_Attribute dwarf_attribute;
uint64_t addend;
if (upper_bound_attribute) {
dwarf_attribute = *upper_bound_attribute;
addend = 1;
} else {
dwarf_attribute = *count_attribute;
addend = 0;
}
uint64_t value;
if (dwarf_formudata(&dwarf_attribute, &value) == kReturnOk) {
return value + addend;
}
// Don't fail if attribute is not a constant and treat this as no count
// provided. This can happen if array has variable length.
// TODO: implement clean solution for separating "not a
// constant" errors from other errors.
return {};
}
Files::Files(Entry& compilation_unit) {
if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) !=
kReturnOk) {
Die() << "No source file information in DWARF";
}
}
std::optional<std::string> Files::MaybeGetFile(Entry& entry,
uint32_t attribute) const {
auto file_index = entry.MaybeGetUnsignedConstant(attribute);
if (!file_index) {
return std::nullopt;
}
Check(files_ != nullptr) << "dwarf::Files was not initialised";
if (*file_index >= files_count_) {
Die() << "File index is greater than or equal files count (" << *file_index
<< " >= " << files_count_ << ")";
}
const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr);
Check(result != nullptr) << "dwarf_filesrc returned error";
return result;
}
} // namespace dwarf
} // namespace stg