blob: 2682c4909d9a56ae9ab5030b74a224d6b134cfd5 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- mode: C++ -*-
//
// Copyright 2021-2023 Google LLC
//
// Licensed under the Apache License v2.0 with LLVM Exceptions (the
// "License"); you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
//
// https://llvm.org/LICENSE.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Giuliano Procida
// Author: Ignes Simeonova
#include "abigail_reader.h"
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iomanip>
#include <ios>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <sstream>
#include <string>
#include <string_view>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include "error.h"
#include "file_descriptor.h"
#include "graph.h"
#include "metrics.h"
#include "scope.h"
#include "type_normalisation.h"
namespace stg {
namespace abixml {
namespace {
// Cast a libxml string to C string and present it as a string_view.
std::string_view FromLibxml(const xmlChar* str) {
return reinterpret_cast<const char*>(str);
}
// Cast a C string to a libxml string.
const xmlChar* ToLibxml(const char* str) {
return reinterpret_cast<const xmlChar*>(str);
}
// Get the name of an XML element.
std::string_view GetName(xmlNodePtr element) {
return FromLibxml(element->name);
}
void CheckName(const char* name, xmlNodePtr element) {
const auto element_name = GetName(element);
if (element_name != name) {
Die() << "expected element '" << name
<< "' but got '" << element_name << "'";
}
}
xmlNodePtr Child(xmlNodePtr node) {
return node->children;
}
xmlNodePtr Next(xmlNodePtr node) {
return node->next;
}
xmlNodePtr GetOnlyChild(xmlNodePtr element) {
xmlNodePtr child = Child(element);
if (child == nullptr || Next(child) != nullptr) {
Die() << "element '" << GetName(element) << "' without exactly one child";
}
return child;
}
// Get an optional attribute.
std::optional<std::string> GetAttribute(xmlNodePtr node, const char* name) {
std::optional<std::string> result;
xmlChar* attribute = xmlGetProp(node, ToLibxml(name));
if (attribute) {
result.emplace(FromLibxml(attribute));
xmlFree(attribute);
}
return result;
}
// Get an attribute.
std::string GetAttributeOrDie(xmlNodePtr node, const char* name) {
xmlChar* attribute = xmlGetProp(node, ToLibxml(name));
if (!attribute) {
Die() << "element '" << GetName(node)
<< "' missing attribute '" << name << "'";
}
const std::string result(FromLibxml(attribute));
xmlFree(attribute);
return result;
}
// Set an attribute value.
void SetAttribute(xmlNodePtr node, const char* name, const std::string &value) {
xmlSetProp(node, ToLibxml(name), ToLibxml(value.c_str()));
}
// Unset an attribute value.
void UnsetAttribute(xmlNodePtr node, const char* name) {
xmlUnsetProp(node, ToLibxml(name));
}
// Remove a node and free its storage.
void RemoveNode(xmlNodePtr node) {
xmlUnlinkNode(node);
xmlFreeNode(node);
}
// Move a node to be the last child of another.
void MoveNode(xmlNodePtr node, xmlNodePtr destination) {
xmlUnlinkNode(node);
xmlAddChild(destination, node);
}
template <typename T>
std::optional<T> Parse(const std::string& value) {
T result;
std::istringstream is(value);
is >> std::noskipws >> result;
if (is && is.eof()) {
return {result};
}
return {};
}
template <>
std::optional<bool> Parse<bool>(const std::string& value) {
if (value == "yes") {
return {true};
} else if (value == "no") {
return {false};
}
return {};
}
template <>
std::optional<ElfSymbol::SymbolType> Parse<ElfSymbol::SymbolType>(
const std::string& value) {
if (value == "object-type") {
return {ElfSymbol::SymbolType::OBJECT};
} else if (value == "func-type") {
return {ElfSymbol::SymbolType::FUNCTION};
} else if (value == "common-type") {
return {ElfSymbol::SymbolType::COMMON};
} else if (value == "tls-type") {
return {ElfSymbol::SymbolType::TLS};
} else if (value == "gnu-ifunc-type") {
return {ElfSymbol::SymbolType::GNU_IFUNC};
}
return {};
}
template <>
std::optional<ElfSymbol::Binding> Parse<ElfSymbol::Binding>(
const std::string& value) {
if (value == "global-binding") {
return {ElfSymbol::Binding::GLOBAL};
} else if (value == "local-binding") {
return {ElfSymbol::Binding::LOCAL};
} else if (value == "weak-binding") {
return {ElfSymbol::Binding::WEAK};
} else if (value == "gnu-unique-binding") {
return {ElfSymbol::Binding::GNU_UNIQUE};
}
return {};
}
template <>
std::optional<ElfSymbol::Visibility> Parse<ElfSymbol::Visibility>(
const std::string& value) {
if (value == "default-visibility") {
return {ElfSymbol::Visibility::DEFAULT};
} else if (value == "protected-visibility") {
return {ElfSymbol::Visibility::PROTECTED};
} else if (value == "hidden-visibility") {
return {ElfSymbol::Visibility::HIDDEN};
} else if (value == "internal-visibility") {
return {ElfSymbol::Visibility::INTERNAL};
}
return {};
}
template <>
std::optional<ElfSymbol::CRC> Parse<ElfSymbol::CRC>(const std::string& value) {
uint32_t number;
std::istringstream is(value);
is >> std::noskipws >> std::hex >> number;
if (is && is.eof()) {
return std::make_optional<ElfSymbol::CRC>(number);
}
return std::nullopt;
}
template <typename T>
T GetParsedValueOrDie(xmlNodePtr element, const char* name,
const std::string& value, const std::optional<T>& parse) {
if (parse) {
return *parse;
}
Die() << "element '" << GetName(element)
<< "' has attribute '" << name
<< "' with bad value '" << value << "'";
}
template <typename T>
T ReadAttributeOrDie(xmlNodePtr element, const char* name) {
const auto value = GetAttributeOrDie(element, name);
return GetParsedValueOrDie(element, name, value, Parse<T>(value));
}
template <typename T>
std::optional<T> ReadAttribute(xmlNodePtr element, const char* name) {
const auto value = GetAttribute(element, name);
if (value) {
return {GetParsedValueOrDie(element, name, *value, Parse<T>(*value))};
}
return {};
}
template <typename T>
T ReadAttribute(xmlNodePtr element, const char* name, const T& default_value) {
const auto value = GetAttribute(element, name);
if (value) {
return GetParsedValueOrDie(element, name, *value, Parse<T>(*value));
}
return default_value;
}
template <typename T>
T ReadAttribute(xmlNodePtr element, const char* name,
std::function<std::optional<T>(const std::string&)> parse) {
const auto value = GetAttributeOrDie(element, name);
return GetParsedValueOrDie(element, name, value, parse(value));
}
// Remove non-element nodes, recursively.
//
// This simplifies subsequent manipulation. This should only remove comment,
// text and possibly CDATA nodes.
void StripNonElements(xmlNodePtr node) {
switch (node->type) {
case XML_COMMENT_NODE:
case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE:
RemoveNode(node);
break;
case XML_ELEMENT_NODE: {
xmlNodePtr child = Child(node);
while (child) {
xmlNodePtr next = Next(child);
StripNonElements(child);
child = next;
}
break;
}
default:
Die() << "unexpected XML node type: " << node->type;
}
}
// Determine whether one XML element is a subtree of another, and optionally,
// actually equal to it.
bool SubOrEqualTree(bool also_equal, xmlNodePtr left, xmlNodePtr right) {
// Node names must match.
const auto left_name = GetName(left);
const auto right_name = GetName(right);
if (left_name != right_name) {
return false;
}
// Attributes may be missing on the left, but must match otherwise.
size_t left_attributes = 0;
for (auto* p = left->properties; p; p = p->next) {
++left_attributes;
const auto attribute = FromLibxml(p->name);
const char* attribute_name = attribute.data();
const auto left_value = GetAttributeOrDie(left, attribute_name);
const auto right_value = GetAttribute(right, attribute_name);
if (!right_value || left_value != right_value.value()) {
return false;
}
}
// To also be equal, we just need to check the counts are the same.
if (also_equal) {
size_t right_attributes = 0;
for (auto* p = right->properties; p; p = p->next) {
++right_attributes;
}
if (left_attributes != right_attributes) {
return false;
}
}
// The left subelements must be a subsequence of the right ones and to also be
// equal, we must not have skipped any right ones.
xmlNodePtr left_child = Child(left);
xmlNodePtr right_child = Child(right);
while (left_child != nullptr && right_child != nullptr) {
if (SubOrEqualTree(also_equal, left_child, right_child)) {
left_child = Next(left_child);
} else if (also_equal) {
return false;
}
right_child = Next(right_child);
}
return left_child == nullptr && (right_child == nullptr || !also_equal);
}
} // namespace
// Determine whether one XML element is a subtree of another.
bool SubTree(xmlNodePtr left, xmlNodePtr right) {
return SubOrEqualTree(false, left, right);
}
// Determine whether one XML element is the same as another.
bool EqualTree(xmlNodePtr left, xmlNodePtr right) {
return SubOrEqualTree(true, left, right);
}
// Find a maximal XML element if one exists.
std::optional<size_t> MaximalTree(const std::vector<xmlNodePtr>& nodes) {
if (nodes.empty()) {
return std::nullopt;
}
// Find a potentially maximal candidate by scanning through and retaining the
// new node if it's a supertree of the current candidate.
const auto count = nodes.size();
std::vector<bool> ok(count);
size_t candidate = 0;
ok[candidate] = true;
for (size_t ix = 1; ix < count; ++ix) {
if (SubTree(nodes[candidate], nodes[ix])) {
candidate = ix;
ok[candidate] = true;
}
}
// Verify the candidate is indeed maximal by comparing it with the nodes not
// already known to be subtrees of it.
const auto& candidate_node = nodes[candidate];
for (size_t ix = 0; ix < count; ++ix) {
const auto& node = nodes[ix];
if (!ok[ix] && !SubTree(node, candidate_node)) {
return std::nullopt;
}
}
return std::make_optional(candidate);
}
namespace {
// Check if string_view is in an array.
template<size_t N>
bool Contains(const std::array<std::string_view, N>& haystack,
std::string_view needle) {
return std::find(haystack.begin(), haystack.end(), needle) != haystack.end();
}
// Remove source location attributes.
//
// This simplifies element comparison later.
void StripLocationInfo(xmlNodePtr node) {
static const std::array<std::string_view, 7> has_location_info = {
"class-decl",
"enum-decl",
"function-decl",
"parameter",
"typedef-decl",
"union-decl",
"var-decl"
};
if (Contains(has_location_info, GetName(node))) {
UnsetAttribute(node, "filepath");
UnsetAttribute(node, "line");
UnsetAttribute(node, "column");
}
for (auto* child = Child(node); child; child = Next(child)) {
StripLocationInfo(child);
}
}
// Remove access attribute.
//
// This simplifies element comparison later in a very specific way: libabigail
// (possibly older versions) uses the access specifier for the type it's trying
// to "emit in scope", even for its containing types, making deduplicating types
// trickier. We don't care about access anyway, so just remove it everywhere.
void StripAccess(xmlNodePtr node) {
static const std::array<std::string_view, 5> has_access = {
"base-class",
"data-member",
"member-function",
"member-template",
"member-type",
};
if (Contains(has_access, GetName(node))) {
UnsetAttribute(node, "access");
}
for (auto* child = Child(node); child; child = Next(child)) {
StripAccess(child);
}
}
// Elements corresponding to named types that can be anonymous or marked as
// unreachable by libabigail, so user-defined types, excepting typedefs.
const std::array<std::string_view, 3> kNamedTypes = {
"class-decl",
"enum-decl",
"union-decl",
};
// Remove attributes emitted by abidw --load-all-types.
//
// With this invocation and if any user-defined types are deemed unreachable,
// libabigail will output a tracking-non-reachable-types attribute on top-level
// elements and an is-non-reachable attribute on each such type element.
//
// We have our own graph-theoretic notion of reachability and these attributes
// have no ABI relevance and can interfere with element comparisons.
void StripReachabilityAttributes(xmlNodePtr node) {
const auto node_name = GetName(node);
if (node_name == "abi-corpus-group" || node_name == "abi-corpus") {
UnsetAttribute(node, "tracking-non-reachable-types");
} else if (Contains(kNamedTypes, node_name)) {
UnsetAttribute(node, "is-non-reachable");
}
for (auto* child = Child(node); child; child = Next(child)) {
StripReachabilityAttributes(child);
}
}
// Fix bad DWARF -> ELF links caused by size zero symbol confusion.
//
// libabigail used to be confused by these sorts of symbols, resulting in
// declarations pointing at the wrong ELF symbols:
//
// 573623: ffffffc0122383c0 256 OBJECT GLOBAL DEFAULT 33 vm_node_stat
// 573960: ffffffc0122383c0 0 OBJECT GLOBAL DEFAULT 33 vm_numa_stat
void FixBadDwarfElfLinks(xmlNodePtr root) {
std::unordered_map<std::string, size_t> elf_links;
// See which ELF symbol IDs have multiple declarations.
const std::function<void(xmlNodePtr)> count = [&](xmlNodePtr node) {
if (GetName(node) == "var-decl") {
const auto symbol_id = GetAttribute(node, "elf-symbol-id");
if (symbol_id) {
++elf_links[symbol_id.value()];
}
}
for (auto* child = Child(node); child; child = Next(child)) {
count(child);
}
};
count(root);
// Fix up likely bad links from DWARF declaration to ELF symbol.
const std::function<void(xmlNodePtr)> fix = [&](xmlNodePtr node) {
if (GetName(node) == "var-decl") {
const auto name = GetAttributeOrDie(node, "name");
const auto mangled_name = GetAttribute(node, "mangled-name");
const auto symbol_id = GetAttribute(node, "elf-symbol-id");
if (mangled_name && symbol_id && name != symbol_id.value()
&& elf_links[symbol_id.value()] > 1) {
if (mangled_name.value() == name) {
Warn() << "fixing up ELF symbol for '" << name
<< "' (was '" << symbol_id.value() << "')";
SetAttribute(node, "elf-symbol-id", name);
} else if (mangled_name.value() == symbol_id.value()) {
Warn() << "fixing up mangled name and ELF symbol for '" << name
<< "' (was '" << symbol_id.value() << "')";
SetAttribute(node, "mangled-name", name);
SetAttribute(node, "elf-symbol-id", name);
}
}
}
for (auto* child = Child(node); child; child = Next(child)) {
fix(child);
}
};
fix(root);
}
// Tidy anonymous types in various ways.
//
// 1. Normalise anonymous type names by dropping the name attribute.
//
// Anonymous type names take the form __anonymous_foo__N where foo is one of
// enum, struct or union and N is an optional numerical suffix. We don't care
// about these names but they may cause trouble when comparing elements.
//
// 2. Reanonymise anonymous types that have been given names.
//
// At some point abidw changed its behaviour given an anonymous with a naming
// typedef. In addition to linking the typedef and type in both directions, the
// code now gives (some) anonymous types the same name as the typedef. This
// misrepresents the original types.
//
// Such types should be anonymous. We set is-anonymous and drop the name.
//
// 3. Discard naming typedef backlinks.
//
// The attribute naming-typedef-id is a backwards link from an anonymous type to
// the typedef that refers to it.
//
// We don't care about these attributes and they may cause comparison issues.
void TidyAnonymousTypes(xmlNodePtr node) {
if (Contains(kNamedTypes, GetName(node))) {
const bool is_anon = ReadAttribute<bool>(node, "is-anonymous", false);
const auto naming_attribute = GetAttribute(node, "naming-typedef-id");
if (is_anon) {
UnsetAttribute(node, "name");
} else if (naming_attribute) {
SetAttribute(node, "is-anonymous", "yes");
UnsetAttribute(node, "name");
}
if (naming_attribute) {
UnsetAttribute(node, "naming-typedef-id");
}
}
for (auto* child = Child(node); child; child = Next(child)) {
TidyAnonymousTypes(child);
}
}
// Remove duplicate members.
void RemoveDuplicateMembers(xmlNodePtr root) {
std::vector<xmlNodePtr> types;
// find all structs and unions
std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
const auto node_name = GetName(node);
// preorder in case we delete a nested element
for (auto* child = Child(node); child; child = Next(child)) {
dfs(child);
}
if (node_name == "class-decl" || node_name == "union-decl") {
types.push_back(node);
}
};
dfs(root);
for (const auto& node : types) {
// partition members by node name
std::map<std::string_view, std::vector<xmlNodePtr>> member_map;
for (auto* child = Child(node); child; child = Next(child)) {
member_map[GetName(child)].push_back(child);
}
// for each kind of member...
for (auto& [name, members] : member_map) {
// ... remove identical duplicate members - O(n^2)
for (size_t i = 0; i < members.size(); ++i) {
xmlNodePtr& i_node = members[i];
bool duplicate = false;
for (size_t j = 0; j < i; ++j) {
const xmlNodePtr& j_node = members[j];
if (j_node != nullptr && EqualTree(i_node, j_node)) {
duplicate = true;
break;
}
}
if (duplicate) {
RemoveNode(i_node);
i_node = nullptr;
}
}
}
}
}
// Eliminate non-conflicting / report conflicting duplicate definitions.
//
// XML elements representing types are sometimes emitted multiple times,
// identically. Also, member typedefs are sometimes emitted separately from
// their types, resulting in duplicate XML fragments.
//
// Both these issues can be resolved by first detecting duplicate occurrences of
// a given type id and then checking to see if there's an instance that subsumes
// the others, which can then be eliminated.
//
// This function eliminates exact type duplicates and duplicates where there is
// at least one maximal definition. It can report the remaining duplicate
// definitions.
//
// If a type has duplicate definitions in multiple namespace scopes or
// definitions with different effective names, these are considered to be
// *conflicting* duplicate definitions. TODO: update text
void HandleDuplicateTypes(xmlNodePtr root) {
// Convenience typedef referring to a namespace scope.
using namespace_scope = std::vector<std::string>;
// map of type-id to pair of set of namespace scopes and vector of
// xmlNodes
std::unordered_map<
std::string,
std::pair<
std::set<namespace_scope>,
std::vector<xmlNodePtr>>> types;
namespace_scope namespaces;
// find all type occurrences
std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
const auto node_name = GetName(node);
std::optional<std::string> namespace_name;
if (node_name == "namespace-decl") {
namespace_name = GetAttribute(node, "name");
}
if (namespace_name) {
namespaces.push_back(namespace_name.value());
}
if (node_name == "abi-corpus-group"
|| node_name == "abi-corpus"
|| node_name == "abi-instr"
|| namespace_name) {
for (auto* child = Child(node); child; child = Next(child)) {
dfs(child);
}
} else {
const auto id = GetAttribute(node, "id");
if (id) {
auto& info = types[id.value()];
info.first.insert(namespaces);
info.second.push_back(node);
}
}
if (namespace_name) {
namespaces.pop_back();
}
};
dfs(root);
for (const auto& [id, scopes_and_definitions] : types) {
const auto& [scopes, definitions] = scopes_and_definitions;
if (scopes.size() > 1) {
Warn() << "conflicting scopes found for type '" << id << '\'';
continue;
}
const auto possible_maximal = MaximalTree(definitions);
if (possible_maximal) {
// Remove all but the maximal definition.
const size_t maximal = possible_maximal.value();
for (size_t ix = 0; ix < definitions.size(); ++ix) {
if (ix != maximal) {
RemoveNode(definitions[ix]);
}
}
continue;
}
// As a rare alternative, check for a stray anonymous member that has been
// separated from the main definition.
size_t strays = 0;
std::optional<size_t> stray;
std::optional<size_t> non_stray;
for (size_t ix = 0; ix < definitions.size(); ++ix) {
auto node = definitions[ix];
auto member = Child(node);
if (member && !Next(member) && GetName(member) == "data-member") {
auto decl = Child(member);
if (decl && !Next(decl) && GetName(decl) == "var-decl") {
auto name = GetAttribute(decl, "name");
if (name && name.value().empty()) {
++strays;
stray = ix;
continue;
}
}
}
non_stray = ix;
}
if (strays + 1 == definitions.size() && stray.has_value()
&& non_stray.has_value()) {
const auto stray_index = stray.value();
const auto non_stray_index = non_stray.value();
bool good = true;
for (size_t ix = 0; ix < definitions.size(); ++ix) {
if (ix == stray_index || ix == non_stray_index) {
continue;
}
if (EqualTree(definitions[stray_index], definitions[ix])) {
// it doesn't hurt if we remove exact duplicates and then fail
RemoveNode(definitions[ix]);
} else {
good = false;
break;
}
}
if (good) {
MoveNode(Child(definitions[stray_index]), definitions[non_stray_index]);
RemoveNode(definitions[stray_index]);
continue;
}
}
Warn() << "unresolvable duplicate definitions found for type '" << id
<< '\'';
}
}
} // namespace
// Remove XML nodes and attributes that are neither used or wanted.
void Clean(xmlNodePtr root) {
// Strip non-element nodes to simplify other operations.
StripNonElements(root);
// Strip location information.
StripLocationInfo(root);
// Strip access.
StripAccess(root);
// Strip reachability attributes.
StripReachabilityAttributes(root);
}
namespace {
// Transform XML elements to improve their semantics.
void Tidy(xmlNodePtr root) {
// Fix bad ELF symbol links
FixBadDwarfElfLinks(root);
// Normalise anonymous type names.
// Reanonymise anonymous types.
// Discard naming typedef backlinks.
TidyAnonymousTypes(root);
// Remove duplicate members.
RemoveDuplicateMembers(root);
// Eliminate complete duplicates and extra fragments of types.
// Report conflicting duplicate defintions.
// Record whether there are conflicting duplicate definitions.
HandleDuplicateTypes(root);
}
std::optional<uint64_t> ParseLength(const std::string& value) {
if (value == "infinite" || value == "unknown") {
return {0};
}
return Parse<uint64_t>(value);
}
std::optional<PointerReference::Kind> ParseReferenceKind(
const std::string& value) {
if (value == "lvalue") {
return {PointerReference::Kind::LVALUE_REFERENCE};
} else if (value == "rvalue") {
return {PointerReference::Kind::RVALUE_REFERENCE};
}
return {};
}
} // namespace
Abigail::Abigail(Graph& graph) : graph_(graph) {}
Id Abigail::GetNode(const std::string& type_id) {
const auto [it, inserted] = type_ids_.insert({type_id, Id(0)});
if (inserted) {
it->second = graph_.Allocate();
}
return it->second;
}
Id Abigail::GetEdge(xmlNodePtr element) {
return GetNode(GetAttributeOrDie(element, "type-id"));
}
Id Abigail::GetVariadic() {
if (!variadic_) {
variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)};
}
return *variadic_;
}
Function Abigail::MakeFunctionType(xmlNodePtr function) {
std::vector<Id> parameters;
std::optional<Id> return_type;
for (auto* child = Child(function); child; child = Next(child)) {
const auto child_name = GetName(child);
if (return_type) {
Die() << "unexpected element after return-type";
}
if (child_name == "parameter") {
const auto is_variadic = ReadAttribute<bool>(child, "is-variadic", false);
parameters.push_back(is_variadic ? GetVariadic() : GetEdge(child));
} else if (child_name == "return") {
return_type = {GetEdge(child)};
} else {
Die() << "unrecognised " << GetName(function)
<< " child element '" << child_name << "'";
}
}
if (!return_type) {
Die() << "missing return-type";
}
return Function(*return_type, parameters);
}
Id Abigail::ProcessRoot(xmlNodePtr root) {
Clean(root);
Tidy(root);
const auto name = GetName(root);
if (name == "abi-corpus-group") {
ProcessCorpusGroup(root);
} else if (name == "abi-corpus") {
ProcessCorpus(root);
} else {
Die() << "unrecognised root element '" << name << "'";
}
for (const auto& [type_id, id] : type_ids_) {
if (!graph_.Is(id)) {
Warn() << "no definition found for type '" << type_id << "'";
}
}
const Id id = BuildSymbols();
RemoveUselessQualifiers(graph_, id);
return id;
}
void Abigail::ProcessCorpusGroup(xmlNodePtr group) {
for (auto* corpus = Child(group); corpus; corpus = Next(corpus)) {
CheckName("abi-corpus", corpus);
ProcessCorpus(corpus);
}
}
void Abigail::ProcessCorpus(xmlNodePtr corpus) {
for (auto* element = Child(corpus); element; element = Next(element)) {
const auto name = GetName(element);
if (name == "elf-function-symbols" || name == "elf-variable-symbols") {
ProcessSymbols(element);
} else if (name == "elf-needed") {
// ignore this
} else if (name == "abi-instr") {
ProcessInstr(element);
} else {
Die() << "unrecognised abi-corpus child element '" << name << "'";
}
}
}
void Abigail::ProcessSymbols(xmlNodePtr symbols) {
for (auto* element = Child(symbols); element; element = Next(element)) {
CheckName("elf-symbol", element);
ProcessSymbol(element);
}
}
void Abigail::ProcessSymbol(xmlNodePtr symbol) {
// Symbol processing is done in two parts. In this first part, we parse just
// enough XML attributes to generate a symbol id and determine any aliases.
// Symbol ids in this format can be found in elf-symbol alias attributes and
// in {var,function}-decl elf-symbol-id attributes.
const auto name = GetAttributeOrDie(symbol, "name");
const auto version =
ReadAttribute<std::string>(symbol, "version", std::string());
const bool is_default_version =
ReadAttribute<bool>(symbol, "is-default-version", false);
const auto alias = GetAttribute(symbol, "alias");
std::string elf_symbol_id = name;
std::optional<ElfSymbol::VersionInfo> version_info;
if (!version.empty()) {
version_info = ElfSymbol::VersionInfo{is_default_version, version};
elf_symbol_id += VersionInfoToString(*version_info);
}
Check(symbol_info_map_
.emplace(elf_symbol_id, SymbolInfo{name, version_info, symbol})
.second)
<< "multiple symbols with id " << elf_symbol_id;
if (alias) {
std::istringstream is(*alias);
std::string item;
while (std::getline(is, item, ',')) {
Check(alias_to_main_.insert({item, elf_symbol_id}).second)
<< "multiple aliases with id " << elf_symbol_id;
}
}
}
bool Abigail::ProcessUserDefinedType(std::string_view name, Id id,
xmlNodePtr decl) {
if (name == "typedef-decl") {
ProcessTypedef(id, decl);
} else if (name == "class-decl") {
ProcessStructUnion(id, true, decl);
} else if (name == "union-decl") {
ProcessStructUnion(id, false, decl);
} else if (name == "enum-decl") {
ProcessEnum(id, decl);
} else {
return false;
}
return true;
}
void Abigail::ProcessScope(xmlNodePtr scope) {
for (auto* element = Child(scope); element; element = Next(element)) {
const auto name = GetName(element);
const auto type_id = GetAttribute(element, "id");
// all type elements have "id", all non-types do not
if (type_id) {
const auto id = GetNode(*type_id);
if (graph_.Is(id)) {
Warn() << "duplicate definition of type '" << *type_id << '\'';
continue;
}
if (name == "function-type") {
ProcessFunctionType(id, element);
} else if (name == "pointer-type-def") {
ProcessPointer(id, true, element);
} else if (name == "reference-type-def") {
ProcessPointer(id, false, element);
} else if (name == "qualified-type-def") {
ProcessQualified(id, element);
} else if (name == "array-type-def") {
ProcessArray(id, element);
} else if (name == "type-decl") {
ProcessTypeDecl(id, element);
} else if (!ProcessUserDefinedType(name, id, element)) {
Die() << "bad abi-instr type child element '" << name << "'";
}
} else {
if (name == "var-decl") {
ProcessDecl(true, element);
} else if (name == "function-decl") {
ProcessDecl(false, element);
} else if (name == "namespace-decl") {
ProcessNamespace(element);
} else {
Die() << "bad abi-instr non-type child element '" << name << "'";
}
}
}
}
void Abigail::ProcessInstr(xmlNodePtr instr) {
ProcessScope(instr);
}
void Abigail::ProcessNamespace(xmlNodePtr scope) {
const auto name = GetAttributeOrDie(scope, "name");
const PushScopeName push_scope_name(scope_name_, "namespace", name);
ProcessScope(scope);
}
Id Abigail::ProcessDecl(bool is_variable, xmlNodePtr decl) {
const auto name = scope_name_ + GetAttributeOrDie(decl, "name");
const auto symbol_id = GetAttribute(decl, "elf-symbol-id");
const auto type = is_variable ? GetEdge(decl)
: graph_.Add<Function>(MakeFunctionType(decl));
if (symbol_id) {
// There's a link to an ELF symbol.
const auto [it, inserted] = symbol_id_and_full_name_.emplace(
*symbol_id, std::make_pair(type, name));
if (!inserted) {
Die() << "duplicate type for '" << *symbol_id << "'";
}
}
return type;
}
void Abigail::ProcessFunctionType(Id id, xmlNodePtr function) {
graph_.Set<Function>(id, MakeFunctionType(function));
}
void Abigail::ProcessTypedef(Id id, xmlNodePtr type_definition) {
const auto name = scope_name_ + GetAttributeOrDie(type_definition, "name");
const auto type = GetEdge(type_definition);
graph_.Set<Typedef>(id, name, type);
}
void Abigail::ProcessPointer(Id id, bool is_pointer, xmlNodePtr pointer) {
const auto type = GetEdge(pointer);
const auto kind = is_pointer ? PointerReference::Kind::POINTER
: ReadAttribute<PointerReference::Kind>(
pointer, "kind", &ParseReferenceKind);
graph_.Set<PointerReference>(id, kind, type);
}
void Abigail::ProcessQualified(Id id, xmlNodePtr qualified) {
std::vector<Qualifier> qualifiers;
// Do these in reverse order so we get CVR ordering.
if (ReadAttribute<bool>(qualified, "restrict", false)) {
qualifiers.push_back(Qualifier::RESTRICT);
}
if (ReadAttribute<bool>(qualified, "volatile", false)) {
qualifiers.push_back(Qualifier::VOLATILE);
}
if (ReadAttribute<bool>(qualified, "const", false)) {
qualifiers.push_back(Qualifier::CONST);
}
Check(!qualifiers.empty()) << "qualified-type-def has no qualifiers";
// Handle multiple qualifiers by unconditionally adding as new nodes all but
// the last qualifier which is set into place.
auto type = GetEdge(qualified);
auto count = qualifiers.size();
for (auto qualifier : qualifiers) {
--count;
const Qualified node(qualifier, type);
if (count) {
type = graph_.Add<Qualified>(node);
} else {
graph_.Set<Qualified>(id, node);
}
}
}
void Abigail::ProcessArray(Id id, xmlNodePtr array) {
std::vector<size_t> dimensions;
for (auto* child = Child(array); child; child = Next(child)) {
CheckName("subrange", child);
const auto length = ReadAttribute<uint64_t>(child, "length", &ParseLength);
dimensions.push_back(length);
}
Check(!dimensions.empty()) << "array-type-def element has no children";
// int[M][N] means array[M] of array[N] of int
//
// We need to chain a bunch of types together:
//
// id = array[n] of id = ... = array[n] of id
//
// where the first id is the new type in slot ix
// and the last id is the old type in slot type
//
// Use the same approach as for qualifiers.
auto type = GetEdge(array);
auto count = dimensions.size();
for (auto it = dimensions.crbegin(); it != dimensions.crend(); ++it) {
--count;
const auto size = *it;
const Array node(size, type);
if (count) {
type = graph_.Add<Array>(node);
} else {
graph_.Set<Array>(id, node);
}
}
}
void Abigail::ProcessTypeDecl(Id id, xmlNodePtr type_decl) {
const auto name = scope_name_ + GetAttributeOrDie(type_decl, "name");
const auto bits = ReadAttribute<size_t>(type_decl, "size-in-bits", 0);
if (bits % 8) {
Die() << "size-in-bits is not a multiple of 8";
}
const auto bytes = bits / 8;
if (name == "void") {
graph_.Set<Special>(id, Special::Kind::VOID);
} else {
// libabigail doesn't model encoding at all and we don't want to parse names
// (which will not always work) in an attempt to reconstruct it.
graph_.Set<Primitive>(id, name, /* encoding= */ std::nullopt, bytes);
}
}
void Abigail::ProcessStructUnion(Id id, bool is_struct,
xmlNodePtr struct_union) {
// Libabigail sometimes reports is-declaration-only but still provides some
// child elements. So we check both things.
const bool forward =
ReadAttribute<bool>(struct_union, "is-declaration-only", false)
&& Child(struct_union) == nullptr;
const auto kind = is_struct
? StructUnion::Kind::STRUCT
: StructUnion::Kind::UNION;
const bool is_anonymous =
ReadAttribute<bool>(struct_union, "is-anonymous", false);
const auto name =
is_anonymous ? std::string() : GetAttributeOrDie(struct_union, "name");
const auto full_name =
is_anonymous ? std::string() : scope_name_ + name;
const PushScopeName push_scope_name(scope_name_, kind, name);
if (forward) {
graph_.Set<StructUnion>(id, kind, full_name);
return;
}
const auto bits = ReadAttribute<size_t>(struct_union, "size-in-bits", 0);
const auto bytes = (bits + 7) / 8;
std::vector<Id> base_classes;
std::vector<Id> methods;
std::vector<Id> members;
for (auto* child = Child(struct_union); child; child = Next(child)) {
const auto child_name = GetName(child);
if (child_name == "data-member") {
if (const auto member = ProcessDataMember(is_struct, child)) {
members.push_back(*member);
}
} else if (child_name == "member-type") {
ProcessMemberType(child);
} else if (child_name == "base-class") {
base_classes.push_back(ProcessBaseClass(child));
} else if (child_name == "member-function") {
ProcessMemberFunction(methods, child);
} else {
Die() << "unrecognised " << kind << "-decl child element '" << child_name
<< "'";
}
}
graph_.Set<StructUnion>(id, kind, full_name, bytes, base_classes, methods,
members);
}
void Abigail::ProcessEnum(Id id, xmlNodePtr enumeration) {
bool forward = ReadAttribute<bool>(enumeration, "is-declaration-only", false);
const auto name = ReadAttribute<bool>(enumeration, "is-anonymous", false)
? std::string()
: scope_name_ + GetAttributeOrDie(enumeration, "name");
if (forward) {
graph_.Set<Enumeration>(id, name);
return;
}
xmlNodePtr underlying = Child(enumeration);
Check(underlying) << "enum-decl has no child elements";
CheckName("underlying-type", underlying);
const auto type = GetEdge(underlying);
std::vector<std::pair<std::string, int64_t>> enumerators;
for (auto* enumerator = Next(underlying); enumerator;
enumerator = Next(enumerator)) {
CheckName("enumerator", enumerator);
const auto enumerator_name = GetAttributeOrDie(enumerator, "name");
// libabigail currently supports anything that fits in an int64_t
const auto enumerator_value =
ReadAttributeOrDie<int64_t>(enumerator, "value");
enumerators.emplace_back(enumerator_name, enumerator_value);
}
graph_.Set<Enumeration>(id, name, type, enumerators);
}
Id Abigail::ProcessBaseClass(xmlNodePtr base_class) {
const auto& type = GetEdge(base_class);
const auto offset =
ReadAttributeOrDie<size_t>(base_class, "layout-offset-in-bits");
const auto inheritance = ReadAttribute<bool>(base_class, "is-virtual", false)
? BaseClass::Inheritance::VIRTUAL
: BaseClass::Inheritance::NON_VIRTUAL;
return graph_.Add<BaseClass>(type, offset, inheritance);
}
std::optional<Id> Abigail::ProcessDataMember(bool is_struct,
xmlNodePtr data_member) {
xmlNodePtr decl = GetOnlyChild(data_member);
CheckName("var-decl", decl);
if (ReadAttribute<bool>(data_member, "static", false)) {
ProcessDecl(true, decl);
return {};
}
size_t offset = is_struct
? ReadAttributeOrDie<size_t>(data_member, "layout-offset-in-bits")
: 0;
const auto name = GetAttributeOrDie(decl, "name");
const auto type = GetEdge(decl);
// Note: libabigail does not model member size, yet
return {graph_.Add<Member>(name, type, offset, 0)};
}
void Abigail::ProcessMemberFunction(std::vector<Id>& methods,
xmlNodePtr method) {
xmlNodePtr decl = GetOnlyChild(method);
CheckName("function-decl", decl);
// ProcessDecl creates symbol references so must be called unconditionally.
const auto type = ProcessDecl(false, decl);
const auto vtable_offset = ReadAttribute<uint64_t>(method, "vtable-offset");
if (vtable_offset) {
static const std::string missing = "{missing}";
const auto mangled_name = ReadAttribute(decl, "mangled-name", missing);
const auto name = GetAttributeOrDie(decl, "name");
methods.push_back(
graph_.Add<Method>(mangled_name, name, vtable_offset.value(), type));
}
}
void Abigail::ProcessMemberType(xmlNodePtr member_type) {
xmlNodePtr decl = GetOnlyChild(member_type);
const auto type_id = GetAttributeOrDie(decl, "id");
const auto id = GetNode(type_id);
if (graph_.Is(id)) {
Warn() << "duplicate definition of member type '" << type_id << '\'';
return;
}
const auto name = GetName(decl);
if (!ProcessUserDefinedType(name, id, decl)) {
Die() << "unrecognised member-type child element '" << name << "'";
}
}
Id Abigail::BuildSymbol(const SymbolInfo& info,
std::optional<Id> type_id,
const std::optional<std::string>& name) {
const xmlNodePtr symbol = info.node;
const bool is_defined = ReadAttributeOrDie<bool>(symbol, "is-defined");
const auto crc = ReadAttribute<ElfSymbol::CRC>(symbol, "crc");
const auto ns = ReadAttribute<std::string>(symbol, "namespace");
const auto type = ReadAttributeOrDie<ElfSymbol::SymbolType>(symbol, "type");
const auto binding =
ReadAttributeOrDie<ElfSymbol::Binding>(symbol, "binding");
const auto visibility =
ReadAttributeOrDie<ElfSymbol::Visibility>(symbol, "visibility");
return graph_.Add<ElfSymbol>(
info.name, info.version_info,
is_defined, type, binding, visibility, crc, ns, type_id, name);
}
Id Abigail::BuildSymbols() {
// Libabigail's model is (approximately):
//
// (alias)* -> main symbol <- some decl -> type
//
// which we turn into:
//
// symbol / alias -> type
//
for (const auto& [alias, main] : alias_to_main_) {
Check(!alias_to_main_.count(main))
<< "found main symbol and alias with id " << main;
}
// Build final symbol table, tying symbols to their types.
std::map<std::string, Id> symbols;
for (const auto& [id, symbol_info] : symbol_info_map_) {
const auto main = alias_to_main_.find(id);
const auto lookup = main != alias_to_main_.end() ? main->second : id;
const auto type_id_and_name_it = symbol_id_and_full_name_.find(lookup);
std::optional<Id> type_id;
std::optional<std::string> name;
if (type_id_and_name_it != symbol_id_and_full_name_.end()) {
const auto& type_id_and_name = type_id_and_name_it->second;
type_id = {type_id_and_name.first};
name = {type_id_and_name.second};
}
symbols.insert({id, BuildSymbol(symbol_info, type_id, name)});
}
return graph_.Add<Interface>(symbols);
}
Document Read(const std::string& path, Metrics& metrics) {
// Open input for reading.
FileDescriptor fd(path.c_str(), O_RDONLY);
// Read the XML.
Document document(nullptr, xmlFreeDoc);
{
Time t(metrics, "abigail.libxml_parse");
std::unique_ptr<
std::remove_pointer_t<xmlParserCtxtPtr>, void(*)(xmlParserCtxtPtr)>
context(xmlNewParserCtxt(), xmlFreeParserCtxt);
document.reset(
xmlCtxtReadFd(context.get(), fd.Value(), nullptr, nullptr,
XML_PARSE_NONET));
}
Check(document != nullptr) << "failed to parse input as XML";
return document;
}
Id Read(Graph& graph, const std::string& path, Metrics& metrics) {
const Document document = Read(path, metrics);
xmlNodePtr root = xmlDocGetRootElement(document.get());
Check(root) << "XML document has no root element";
return Abigail(graph).ProcessRoot(root);
}
} // namespace abixml
} // namespace stg