// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- mode: C++ -*-
//
// Copyright 2022-2023 Google LLC
//
// Licensed under the Apache License v2.0 with LLVM Exceptions (the
// "License"); you may not use this file except in compliance with the
// License.  You may obtain a copy of the License at
//
//     https://llvm.org/LICENSE.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Aleksei Vetrov

#include "dwarf_processor.h"

#include <dwarf.h>
#include <elfutils/libdw.h>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

#include "dwarf_wrappers.h"
#include "error.h"
#include "filter.h"
#include "hex.h"
#include "graph.h"
#include "scope.h"

namespace stg {
namespace dwarf {

namespace {

bool HasIncompleteTypes(uint64_t language) {
  return language != DW_LANG_Rust;
}

std::string EntryToString(Entry& entry) {
  std::ostringstream os;
  os << "DWARF entry <" << Hex(entry.GetOffset()) << ">";
  return os.str();
}

std::optional<std::string> MaybeGetName(Entry& entry) {
  return entry.MaybeGetString(DW_AT_name);
}

std::string GetName(Entry& entry) {
  auto result = MaybeGetName(entry);
  if (!result.has_value()) {
    Die() << "Name was not found for " << EntryToString(entry);
  }
  return std::move(*result);
}

std::string GetNameOrEmpty(Entry& entry) {
  auto result = MaybeGetName(entry);
  if (!result.has_value()) {
    return {};
  }
  return std::move(*result);
}

std::string GetLinkageName(int version, Entry& entry) {
  auto linkage_name = entry.MaybeGetString(
      version < 4 ? DW_AT_MIPS_linkage_name : DW_AT_linkage_name);
  if (linkage_name.has_value()) {
    return std::move(*linkage_name);
  }
  return GetNameOrEmpty(entry);
}

size_t GetBitSize(Entry& entry) {
  if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
    return *byte_size * 8;
  } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
    return *bit_size;
  }
  Die() << "Bit size was not found for " << EntryToString(entry);
}

size_t GetByteSize(Entry& entry) {
  if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
    return *byte_size;
  } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
    // Round up bit_size / 8 to get minimal needed storage size in bytes.
    return (*bit_size + 7) / 8;
  }
  Die() << "Byte size was not found for " << EntryToString(entry);
}

Primitive::Encoding GetEncoding(Entry& entry) {
  const auto dwarf_encoding = entry.MaybeGetUnsignedConstant(DW_AT_encoding);
  if (!dwarf_encoding) {
    Die() << "Encoding was not found for " << EntryToString(entry);
  }
  switch (*dwarf_encoding) {
    case DW_ATE_boolean:
      return Primitive::Encoding::BOOLEAN;
    case DW_ATE_complex_float:
      return Primitive::Encoding::COMPLEX_NUMBER;
    case DW_ATE_float:
      return Primitive::Encoding::REAL_NUMBER;
    case DW_ATE_signed:
      return Primitive::Encoding::SIGNED_INTEGER;
    case DW_ATE_signed_char:
      return Primitive::Encoding::SIGNED_CHARACTER;
    case DW_ATE_unsigned:
      return Primitive::Encoding::UNSIGNED_INTEGER;
    case DW_ATE_unsigned_char:
      return Primitive::Encoding::UNSIGNED_CHARACTER;
    case DW_ATE_UTF:
      return Primitive::Encoding::UTF;
    default:
      Die() << "Unknown encoding " << Hex(*dwarf_encoding) << " for "
            << EntryToString(entry);
  }
}

std::optional<Entry> MaybeGetReferredType(Entry& entry) {
  return entry.MaybeGetReference(DW_AT_type);
}

Entry GetReferredType(Entry& entry) {
  auto result = MaybeGetReferredType(entry);
  if (!result.has_value()) {
    Die() << "Type reference was not found in " << EntryToString(entry);
  }
  return *result;
}

size_t GetNumberOfElements(Entry& entry) {
  // DWARF standard says, that array dimensions could be an entry with
  // either DW_TAG_subrange_type or DW_TAG_enumeration_type. However, this
  // code supports only the DW_TAG_subrange_type.
  Check(entry.GetTag() == DW_TAG_subrange_type)
      << "Array's dimensions should be an entry of DW_TAG_subrange_type";
  std::optional<size_t> number_of_elements = entry.MaybeGetCount();
  if (number_of_elements) {
    return *number_of_elements;
  }
  // If a subrange has no DW_AT_count and no DW_AT_upper_bound attribute, its
  // size is unknown.
  return 0;
}

// Calculate number of bits from the "beginning" of the containing entity to
// the "beginning" of the data member using DW_AT_bit_offset.
//
// "Number of bits from the beginning", depends on the definition of the
// "beginning", which is different for big- and little-endian architectures.
// However, DW_AT_bit_offset is defined from the high order bit of the storage
// unit to the high order bit of a field and is the same for both architectures.

// So this function converts DW_AT_bit_offset to the "number of bits from the
// beginning".
size_t CalculateBitfieldAdjustment(Entry& entry, size_t bit_size,
                             bool is_little_endian_binary) {
  if (bit_size == 0) {
    // bit_size == 0 marks that it is not a bit field. No adjustment needed.
    return 0;
  }
  auto container_byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size);
  auto bit_offset = entry.MaybeGetUnsignedConstant(DW_AT_bit_offset);
  Check(container_byte_size.has_value() && bit_offset.has_value())
      << "If member offset is defined as DW_AT_data_member_location, bit field "
         "should have DW_AT_byte_size and DW_AT_bit_offset";
  // The following structure will be used as an example in the explanations:
  // struct foo {
  //   uint16_t rest_of_the_struct;
  //   uint16_t x : 5;
  //   uint16_t y : 6;
  //   uint16_t z : 5;
  // };
  if (is_little_endian_binary) {
    // Compiler usualy packs bit fields starting with the least significant
    // bits, but DW_AT_bit_offset is counted from high to low bits:
    //
    // rest of the struct|<    container   >
    //    Container bits: 01234|56789A|BCDEF
    //  Bit-fields' bits: 01234|012345|01234
    //        bit_offset: <<<<B<<<<<<5<<<<<0
    //   bits from start: 0>>>>>5>>>>>>B>>>>
    //                    <x:5>|< y:6>|<z:5>
    //
    //   x.bit_offset: 11 (0xB) bits
    //   y.bit_offset: 5 bits
    //   z.bit_offset: 0 bits
    //
    // So we need to subtract bit_offset from the container bit size
    // (container_byte_size * 8) to inverse direction. Also we need to convert
    // from high- to low-order bit, because the field "begins" with low-order
    // bit. To do so we need to subtract field's bit size. Resulting formula is:
    //
    //   container_byte_size * 8 - bit_offset - bit_size
    //
    // If we try it on example, we get correct values:
    //   x: 2 * 8 - 11 - 5 = 0
    //   y: 2 * 8 - 5 - 6 = 5
    //   z: 2 * 8 - 0 - 5 = 11 (0xB)
    return *container_byte_size * 8 - *bit_offset - bit_size;
  }
  // Big-endian orders begins with high-order bit and the bit_offset is from the
  // high order bit:
  //
  // rest of the struct|<    container   >
  //    Container bits: FEDCB|A98765|43210
  //  Bit-fields' bits: 43210|543210|43210
  //        bit_offset: 0>>>>>5>>>>>>B>>>>
  //   bits from start: 0>>>>>5>>>>>>B>>>>
  //                    <x:5>|< y:6>|<z:5>
  //
  // So we just return bit_offset.
  return *bit_offset;
}

// Calculate the number of bits from the beginning of the structure to the
// beginning of the data member.
size_t GetDataBitOffset(Entry& entry, size_t bit_size,
                        bool is_little_endian_binary) {
  // Offset may be represented either by DW_AT_data_bit_offset (in bits) or by
  // DW_AT_data_member_location (in bytes).
  if (auto data_bit_offset =
          entry.MaybeGetUnsignedConstant(DW_AT_data_bit_offset)) {
    // DW_AT_data_bit_offset contains what this function needs for any type
    // of member (bitfield or not) on architecture of any endianness.
    return *data_bit_offset;
  } else if (auto byte_offset = entry.MaybeGetMemberByteOffset()) {
    // DW_AT_data_member_location contains offset in bytes.
    const size_t bit_offset = *byte_offset * 8;
    // But there can be offset part, coming from DW_AT_bit_offset. DWARF 5
    // standard requires to use DW_AT_data_bit_offset in this case, but a lot
    // of binaries still use combination of DW_AT_data_member_location and
    // DW_AT_bit_offset.
    const size_t bitfield_adjusment =
        CalculateBitfieldAdjustment(entry, bit_size, is_little_endian_binary);
    return bit_offset + bitfield_adjusment;
  } else {
    // If the beginning of the data member is the same as the beginning of the
    // containing entity then neither attribute is required.
    return 0;
  }
}

}  // namespace

// Transforms DWARF entries to STG.
class Processor {
 public:
  Processor(Graph& graph, Id void_id, Id variadic_id,
            bool is_little_endian_binary,
            const std::unique_ptr<Filter>& file_filter, Types& result)
      : maker_(graph),
        void_id_(void_id),
        variadic_id_(variadic_id),
        is_little_endian_binary_(is_little_endian_binary),
        file_filter_(file_filter),
        result_(result) {}

  void ProcessCompilationUnit(CompilationUnit& compilation_unit) {
    version_ = compilation_unit.version;
    if (file_filter_ != nullptr) {
      files_ = dwarf::Files(compilation_unit.entry);
    }
    Process(compilation_unit.entry);
  }

  void ResolveSymbolSpecifications() {
    std::sort(unresolved_symbol_specifications_.begin(),
              unresolved_symbol_specifications_.end());
    std::sort(scoped_names_.begin(), scoped_names_.end());
    auto symbols_it = unresolved_symbol_specifications_.begin();
    auto names_it = scoped_names_.begin();
    while (symbols_it != unresolved_symbol_specifications_.end()) {
      while (names_it != scoped_names_.end() &&
             names_it->first < symbols_it->first) {
        ++names_it;
      }
      if (names_it == scoped_names_.end() ||
          names_it->first != symbols_it->first) {
        Die() << "Scoped name not found for entry " << Hex(symbols_it->first);
      }
      result_.symbols[symbols_it->second].scoped_name = names_it->second;
      ++symbols_it;
    }
  }

 private:
  void Process(Entry& entry) {
    try {
      return ProcessInternal(entry);
    } catch (Exception& e) {
      std::ostringstream os;
      os << "processing DIE " << Hex(entry.GetOffset());
      e.Add(os.str());
      throw;
    }
  }

  void ProcessInternal(Entry& entry) {
    ++result_.processed_entries;
    const auto tag = entry.GetTag();
    switch (tag) {
      case DW_TAG_array_type:
        ProcessArray(entry);
        break;
      case DW_TAG_enumeration_type:
        ProcessEnum(entry);
        break;
      case DW_TAG_class_type:
        ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
        break;
      case DW_TAG_structure_type:
        ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
        break;
      case DW_TAG_union_type:
        ProcessStructUnion(entry, StructUnion::Kind::UNION);
        break;
      case DW_TAG_member:
        Die() << "DW_TAG_member outside of struct/class/union";
        break;
      case DW_TAG_pointer_type:
        ProcessReference<PointerReference>(
            entry, PointerReference::Kind::POINTER);
        break;
      case DW_TAG_reference_type:
        ProcessReference<PointerReference>(
            entry, PointerReference::Kind::LVALUE_REFERENCE);
        break;
      case DW_TAG_rvalue_reference_type:
        ProcessReference<PointerReference>(
            entry, PointerReference::Kind::RVALUE_REFERENCE);
        break;
      case DW_TAG_ptr_to_member_type:
        ProcessPointerToMember(entry);
        break;
      case DW_TAG_unspecified_type:
        ProcessUnspecifiedType(entry);
        break;
      case DW_TAG_compile_unit:
        language_ = entry.MustGetUnsignedConstant(DW_AT_language);
        ProcessAllChildren(entry);
        break;
      case DW_TAG_typedef:
        ProcessTypedef(entry);
        break;
      case DW_TAG_base_type:
        ProcessBaseType(entry);
        break;
      case DW_TAG_const_type:
        ProcessReference<Qualified>(entry, Qualifier::CONST);
        break;
      case DW_TAG_volatile_type:
        ProcessReference<Qualified>(entry, Qualifier::VOLATILE);
        break;
      case DW_TAG_restrict_type:
        ProcessReference<Qualified>(entry, Qualifier::RESTRICT);
        break;
      case DW_TAG_atomic_type:
        // TODO: test pending BTF / test suite support
        ProcessReference<Qualified>(entry, Qualifier::ATOMIC);
        break;
      case DW_TAG_variable:
        // Process only variables visible externally
        if (entry.GetFlag(DW_AT_external)) {
          ProcessVariable(entry);
        }
        break;
      case DW_TAG_subroutine_type:
        // Standalone function type, for example, used in function pointers.
        ProcessFunction(entry);
        break;
      case DW_TAG_subprogram:
        // DWARF equivalent of ELF function symbol.
        ProcessFunction(entry);
        break;
      case DW_TAG_namespace:
        ProcessNamespace(entry);
        break;
      case DW_TAG_lexical_block:
        ProcessAllChildren(entry);
        break;

      default:
        // TODO: die on unexpected tag, when this switch contains
        // all expected tags
        break;
    }
  }

  void ProcessAllChildren(Entry& entry) {
    for (auto& child : entry.GetChildren()) {
      Process(child);
    }
  }

  void CheckNoChildren(Entry& entry) {
    if (!entry.GetChildren().empty()) {
      Die() << "Entry expected to have no children";
    }
  }

  void ProcessNamespace(Entry& entry) {
    const auto name = GetNameOrEmpty(entry);
    const PushScopeName push_scope_name(scope_, "namespace", name);
    ProcessAllChildren(entry);
  }

  void ProcessBaseType(Entry& entry) {
    CheckNoChildren(entry);
    const auto type_name = GetName(entry);
    const size_t bit_size = GetBitSize(entry);
    if (bit_size % 8) {
      Die() << "type '" << type_name << "' size is not a multiple of 8";
    }
    const size_t byte_size = bit_size / 8;
    AddProcessedNode<Primitive>(entry, type_name, GetEncoding(entry),
                                byte_size);
  }

  void ProcessTypedef(Entry& entry) {
    const auto type_name = GetName(entry);
    const auto full_name = scope_.name + type_name;
    const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
    const Id id = AddProcessedNode<Typedef>(entry, full_name, referred_type_id);
    if (!ShouldKeepDefinition(entry, type_name)) {
      // We always model (and keep) typedef definitions. But we should exclude
      // filtered out types from being type roots.
      return;
    }
    AddNamedTypeNode(id);
  }

  template<typename Node, typename KindType>
  void ProcessReference(Entry& entry, KindType kind) {
    const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
    AddProcessedNode<Node>(entry, kind, referred_type_id);
  }

  void ProcessPointerToMember(Entry& entry) {
    const Id containing_type_id =
        GetReferredTypeId(entry.MaybeGetReference(DW_AT_containing_type));
    const Id pointee_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
    AddProcessedNode<PointerToMember>(entry, containing_type_id,
                                      pointee_type_id);
  }

  void ProcessUnspecifiedType(Entry& entry) {
    const std::string type_name =  GetName(entry);
    Check(type_name == "decltype(nullptr)")
        << "Unsupported DW_TAG_unspecified_type: " << type_name;
    AddProcessedNode<Special>(entry, Special::Kind::NULLPTR);
  }

  bool ShouldKeepDefinition(Entry& entry, const std::string& name) const {
    if (!HasIncompleteTypes(language_) || file_filter_ == nullptr) {
      return true;
    }
    const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file);
    if (!file) {
      // Built in types that do not have DW_AT_decl_file should be preserved.
      static constexpr std::string_view kBuiltinPrefix = "__";
      if (name.starts_with(kBuiltinPrefix)) {
        return true;
      }
      Die() << "File filter is provided, but " << name << " ("
            << EntryToString(entry) << ") doesn't have DW_AT_decl_file";
    }
    return (*file_filter_)(*file);
  }

  void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
    const auto type_name = GetNameOrEmpty(entry);
    const auto full_name =
        type_name.empty() ? type_name : scope_.name + type_name;
    const PushScopeName push_scope_name(scope_, kind, type_name);

    std::vector<Id> base_classes;
    std::vector<Id> members;
    std::vector<Id> methods;
    std::optional<VariantAndMembers> variant_and_members = std::nullopt;

    for (auto& child : entry.GetChildren()) {
      auto child_tag = child.GetTag();
      // All possible children of struct/class/union
      switch (child_tag) {
        case DW_TAG_member:
          if (child.GetFlag(DW_AT_external)) {
            // static members are interpreted as variables and not included in
            // members.
            ProcessVariable(child);
          } else {
            members.push_back(GetIdForEntry(child));
            ProcessMember(child);
          }
          break;
        case DW_TAG_subprogram:
          ProcessMethod(methods, child);
          break;
        case DW_TAG_inheritance:
          base_classes.push_back(GetIdForEntry(child));
          ProcessBaseClass(child);
          break;
        case DW_TAG_structure_type:
        case DW_TAG_class_type:
        case DW_TAG_union_type:
        case DW_TAG_enumeration_type:
        case DW_TAG_typedef:
        case DW_TAG_const_type:
        case DW_TAG_volatile_type:
        case DW_TAG_restrict_type:
        case DW_TAG_atomic_type:
        case DW_TAG_array_type:
        case DW_TAG_pointer_type:
        case DW_TAG_reference_type:
        case DW_TAG_rvalue_reference_type:
        case DW_TAG_ptr_to_member_type:
        case DW_TAG_unspecified_type:
        case DW_TAG_variable:
          Process(child);
          break;
        case DW_TAG_imported_declaration:
        case DW_TAG_imported_module:
          // For now information there is useless for ABI monitoring, but we
          // need to check that there is no missing information in descendants.
          CheckNoChildren(child);
          break;
        case DW_TAG_template_type_parameter:
        case DW_TAG_template_value_parameter:
        case DW_TAG_GNU_template_template_param:
        case DW_TAG_GNU_template_parameter_pack:
          // We just skip these as neither GCC nor Clang seem to use them
          // properly (resulting in no references to such DIEs).
          break;
        case DW_TAG_variant_part:
          if (full_name.empty()) {
            Die() << "Variant name should not be empty: "
                  << EntryToString(entry);
          }
          variant_and_members = GetVariantAndMembers(child);
          break;
        default:
          Die() << "Unexpected tag for child of struct/class/union: "
                << Hex(child_tag) << ", " << EntryToString(child);
      }
    }

    if (variant_and_members.has_value()) {
      // Add a Variant node since this entry represents a variant rather than a
      // struct or union.
      const Id id =
          AddProcessedNode<Variant>(entry, full_name, GetByteSize(entry),
                                    variant_and_members->discriminant,
                                    std::move(variant_and_members->members));
      AddNamedTypeNode(id);
      return;
    }

    if (entry.GetFlag(DW_AT_declaration) ||
        !ShouldKeepDefinition(entry, type_name)) {
      // Declaration may have partial information about members or method.
      // We only need to parse children for information that will be needed in
      // complete definition, but don't need to store them in incomplete node.
      AddProcessedNode<StructUnion>(entry, kind, full_name);
      return;
    }

    const auto byte_size = GetByteSize(entry);

    const Id id = AddProcessedNode<StructUnion>(
        entry, kind, full_name, byte_size, std::move(base_classes),
        std::move(methods), std::move(members));
    if (!full_name.empty()) {
      AddNamedTypeNode(id);
    }
  }

  void ProcessVariantMember(Entry& entry) {
    // TODO: Process signed discriminant values.
    auto dw_discriminant_value =
        entry.MaybeGetUnsignedConstant(DW_AT_discr_value);
    auto discriminant_value =
        dw_discriminant_value
            ? std::optional(static_cast<int64_t>(*dw_discriminant_value))
            : std::nullopt;

    auto children = entry.GetChildren();
    if (children.size() != 1) {
      Die() << "Unexpected number of children for variant member: "
            << EntryToString(entry);
    }

    auto child = children[0];
    if (child.GetTag() != DW_TAG_member) {
      Die() << "Unexpected tag for variant member child: "
            << Hex(child.GetTag()) << ", " << EntryToString(child);
    }
    if (GetDataBitOffset(child, 0, is_little_endian_binary_) != 0) {
      Die() << "Unexpected data member location for variant member: "
            << EntryToString(child);
    }

    const std::string name = GetNameOrEmpty(child);
    auto referred_type_id = GetReferredTypeId(GetReferredType(child));
    AddProcessedNode<VariantMember>(entry, name, discriminant_value,
                                    referred_type_id);
  }

  void ProcessMember(Entry& entry) {
    const auto name = GetNameOrEmpty(entry);
    auto referred_type = GetReferredType(entry);
    const Id referred_type_id = GetIdForEntry(referred_type);
    auto optional_bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size);
    // Member has DW_AT_bit_size if and only if it is bit field.
    // STG uses bit_size == 0 to mark that the member is not a bit field.
    Check(!optional_bit_size || *optional_bit_size > 0)
        << "DW_AT_bit_size should be a positive number";
    auto bit_size = optional_bit_size ? *optional_bit_size : 0;
    AddProcessedNode<Member>(
        entry, std::move(name), referred_type_id,
        GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
  }

  void ProcessMethod(std::vector<Id>& methods, Entry& entry) {
    Subprogram subprogram = GetSubprogram(entry);
    auto id = maker_.Add<Function>(std::move(subprogram.node));
    if (subprogram.external && subprogram.address) {
      // Only external functions with address are useful for ABI monitoring
      // TODO: cover virtual methods
      const auto new_symbol_idx = result_.symbols.size();
      result_.symbols.push_back(Types::Symbol{
          .scoped_name = GetScopedNameForSymbol(
              new_symbol_idx, subprogram.name_with_context),
          .linkage_name = subprogram.linkage_name,
          .address = *subprogram.address,
          .type_id = id});
    }
    const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
                                 .value_or(DW_VIRTUALITY_none);
    if (virtuality == DW_VIRTUALITY_virtual ||
        virtuality == DW_VIRTUALITY_pure_virtual) {
      if (!subprogram.name_with_context.unscoped_name) {
        Die() << "Method " << EntryToString(entry) << " should have name";
      }
      if (subprogram.name_with_context.specification) {
        Die() << "Method " << EntryToString(entry)
              << " shouldn't have specification";
      }
      const auto vtable_offset = entry.MaybeGetVtableOffset().value_or(0);
      methods.push_back(AddProcessedNode<Method>(
          entry, subprogram.linkage_name,
          *subprogram.name_with_context.unscoped_name, vtable_offset, id));
    }
  }

  void ProcessBaseClass(Entry& entry) {
    const Id type_id = GetReferredTypeId(GetReferredType(entry));
    const auto byte_offset = entry.MaybeGetMemberByteOffset();
    if (!byte_offset) {
      Die() << "No offset found for base class " << EntryToString(entry);
    }
    const auto bit_offset = *byte_offset * 8;
    const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
                                 .value_or(DW_VIRTUALITY_none);
    BaseClass::Inheritance inheritance;
    if (virtuality == DW_VIRTUALITY_none) {
      inheritance = BaseClass::Inheritance::NON_VIRTUAL;
    } else if (virtuality == DW_VIRTUALITY_virtual) {
      inheritance = BaseClass::Inheritance::VIRTUAL;
    } else {
      Die() << "Unexpected base class virtuality: " << virtuality;
    }
    AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance);
  }

  void ProcessArray(Entry& entry) {
    auto referred_type = GetReferredType(entry);
    Id referred_type_id = GetIdForEntry(referred_type);
    auto children = entry.GetChildren();
    // Multiple children in array describe multiple dimensions of this array.
    // For example, int[M][N] contains two children, M located in the first
    // child, N located in the second child. But in STG multidimensional arrays
    // are represented as chain of arrays: int[M][N] is array[M] of array[N] of
    // int.
    //
    // We need to chain children as types together in reversed order.
    // "referred_type_id" is updated every time to contain the top element in
    // the chain. Rightmost chldren refers to the original "referred_type_id".
    for (auto it = children.rbegin(); it != children.rend(); ++it) {
      auto& child = *it;
      // All subarrays except the first (last in the reversed order) are
      // attached to the corresponding child. First subarray (last in the
      // reversed order) is attached to the original entry itself.
      auto& entry_to_attach = (it + 1 == children.rend()) ? entry : child;
      // Update referred_type_id so next array in chain points there.
      referred_type_id = AddProcessedNode<Array>(
          entry_to_attach, GetNumberOfElements(child), referred_type_id);
    }
  }

  void ProcessEnum(Entry& entry) {
    const auto type_name = GetNameOrEmpty(entry);
    const auto full_name =
        type_name.empty() ? type_name : scope_.name + type_name;

    if (entry.GetFlag(DW_AT_declaration)) {
      // It is expected to have only name and no children in declaration.
      // However, it is not guaranteed and we should do something if we find an
      // example.
      CheckNoChildren(entry);
      AddProcessedNode<Enumeration>(entry, full_name);
      return;
    }
    const Id underlying_type_id =
        GetReferredTypeId(MaybeGetReferredType(entry));
    auto children = entry.GetChildren();
    Enumeration::Enumerators enumerators;
    enumerators.reserve(children.size());
    for (auto& child : children) {
      auto child_tag = child.GetTag();
      switch (child_tag) {
        case DW_TAG_enumerator: {
          const std::string enumerator_name = GetName(child);
          // TODO: detect signedness of underlying type and call
          // an appropriate method.
          std::optional<size_t> value_optional =
              child.MaybeGetUnsignedConstant(DW_AT_const_value);
          Check(value_optional.has_value()) << "Enumerator should have value";
          // TODO: support both uint64_t and int64_t, depending on
          // signedness of underlying type.
          enumerators.emplace_back(enumerator_name,
                                   static_cast<int64_t>(*value_optional));
          break;
        }
        case DW_TAG_subprogram:
          // STG does not support virtual methods for enums.
          Check(child.MaybeGetUnsignedConstant(DW_AT_virtuality)
                    .value_or(DW_VIRTUALITY_none) == DW_VIRTUALITY_none)
              << "Enums can not have virtual methods: " << EntryToString(child);
          ProcessFunction(child);
          break;
        default:
          Die() << "Unexpected tag for child of enum: " << Hex(child_tag)
                << ", " << EntryToString(child);
      }
    }
    if (!ShouldKeepDefinition(entry, type_name)) {
      AddProcessedNode<Enumeration>(entry, full_name);
      return;
    }
    const Id id = AddProcessedNode<Enumeration>(
        entry, full_name, underlying_type_id, std::move(enumerators));
    if (!full_name.empty()) {
      AddNamedTypeNode(id);
    }
  }

  struct VariantAndMembers {
    std::optional<Id> discriminant;
    std::vector<Id> members;
  };

  VariantAndMembers GetVariantAndMembers(Entry& entry) {
    std::vector<Id> members;
    std::optional<Id> discriminant = std::nullopt;
    auto discriminant_entry = entry.MaybeGetReference(DW_AT_discr);
    if (discriminant_entry.has_value()) {
      discriminant = GetIdForEntry(*discriminant_entry);
      ProcessMember(*discriminant_entry);
    }

    for (auto& child : entry.GetChildren()) {
      auto child_tag = child.GetTag();
      switch (child_tag) {
        case DW_TAG_member: {
          if (child.GetOffset() != discriminant_entry->GetOffset()) {
            Die() << "Encountered rogue member for variant: "
                  << EntryToString(entry);
          }
          if (!child.GetFlag(DW_AT_artificial)) {
            Die() << "Variant discriminant must be an artificial member: "
                  << EntryToString(child);
          }
          break;
        }
        case DW_TAG_variant:
          members.push_back(GetIdForEntry(child));
          ProcessVariantMember(child);
          break;
        default:
          Die() << "Unexpected tag for child of variant: " << Hex(child_tag)
                << ", " << EntryToString(child);
      }
    }
    return VariantAndMembers{.discriminant = discriminant,
                             .members = std::move(members)};
  }

  struct NameWithContext {
    std::optional<Dwarf_Off> specification;
    std::optional<std::string> unscoped_name;
    std::optional<std::string> scoped_name;
  };

  NameWithContext GetNameWithContext(Entry& entry) {
    NameWithContext result;
    // Leaf of specification tree is usually a declaration (of a function or a
    // method). Then goes definition, which references declaration by
    // DW_AT_specification. And on top we have instantiation, which references
    // definition by DW_AT_abstract_origin. In the worst case we have:
    // * instantiation
    //     >-DW_AT_abstract_origin-> definition
    //         >-DW_AT_specification-> declaration
    //
    // By using attribute integration we fold all information from definition to
    // instantiation, flattening hierarchy:
    // * instantiation + definition
    //     >-DW_AT_specification-> declaration
    // NB: DW_AT_abstract_origin attribute is also visible, but it should be
    // ignored, since we already used it during integration.
    //
    // We also need to support this case, when we don't have separate
    // declaration:
    // * instantiation +
    //     >-DW_AT_abstract_origin -> definition
    //
    // So the final algorithm is to get final DW_AT_specification through the
    // whole chain, or use DW_AT_abstract_origin if there is no
    // DW_AT_specification.
    if (auto specification = entry.MaybeGetReference(DW_AT_specification)) {
      result.specification = specification->GetOffset();
    } else if (auto abstract_origin =
                   entry.MaybeGetReference(DW_AT_abstract_origin)) {
      result.specification = abstract_origin->GetOffset();
    }
    result.unscoped_name = entry.MaybeGetDirectString(DW_AT_name);
    if (!result.unscoped_name && !result.specification) {
      // If there is no name and specification, then this entry is anonymous.
      // Anonymous entries are modelled as the empty string and not nullopt.
      // This allows us to fill and register scoped_name (also empty string) to
      // be used in references.
      result.unscoped_name = std::string();
    }
    if (result.unscoped_name) {
      result.scoped_name = scope_.name + *result.unscoped_name;
      scoped_names_.emplace_back(
          entry.GetOffset(), *result.scoped_name);
    }
    return result;
  }

  std::string GetScopedNameForSymbol(size_t symbol_idx,
                                     const NameWithContext& name) {
    // This method is designed to resolve this topology:
    //   A: specification=B
    //   B: name="foo"
    // Any other topologies are rejected:
    //   * Name and specification in one DIE: checked right below.
    //   * Chain of specifications will result in symbol referencing another
    //     specification, which will not be in scoped_names_, because "name and
    //     specification in one DIE" is rejected.
    if (name.scoped_name) {
      if (name.specification) {
        Die() << "Entry has name " << *name.scoped_name
              << " and specification " << Hex(*name.specification);
      }
      return *name.scoped_name;
    }
    if (name.specification) {
      unresolved_symbol_specifications_.emplace_back(*name.specification,
                                                     symbol_idx);
      // Name will be filled in ResolveSymbolSpecifications
      return {};
    }
    Die() << "Entry should have either name or specification";
  }

  void ProcessVariable(Entry& entry) {
    auto name_with_context = GetNameWithContext(entry);

    auto referred_type = GetReferredType(entry);
    const Id referred_type_id = GetIdForEntry(referred_type);

    if (auto address = entry.MaybeGetAddress(DW_AT_location)) {
      // Only external variables with address are useful for ABI monitoring
      const auto new_symbol_idx = result_.symbols.size();
      result_.symbols.push_back(Types::Symbol{
          .scoped_name = GetScopedNameForSymbol(
              new_symbol_idx, name_with_context),
          .linkage_name = GetLinkageName(version_, entry),
          .address = *address,
          .type_id = referred_type_id});
    }
  }

  void ProcessFunction(Entry& entry) {
    Subprogram subprogram = GetSubprogram(entry);
    const Id id = AddProcessedNode<Function>(entry, std::move(subprogram.node));
    if (subprogram.external && subprogram.address) {
      // Only external functions with address are useful for ABI monitoring
      const auto new_symbol_idx = result_.symbols.size();
      result_.symbols.push_back(Types::Symbol{
          .scoped_name = GetScopedNameForSymbol(
              new_symbol_idx, subprogram.name_with_context),
          .linkage_name = std::move(subprogram.linkage_name),
          .address = *subprogram.address,
          .type_id = id});
    }
  }

  struct Subprogram {
    Function node;
    NameWithContext name_with_context;
    std::string linkage_name;
    std::optional<Address> address;
    bool external;
  };

  Subprogram GetSubprogram(Entry& entry) {
    const Id return_type_id = GetReferredTypeId(MaybeGetReferredType(entry));

    std::vector<Id> parameters;
    for (auto& child : entry.GetChildren()) {
      auto child_tag = child.GetTag();
      switch (child_tag) {
        case DW_TAG_formal_parameter:
          parameters.push_back(GetReferredTypeId(GetReferredType(child)));
          break;
        case DW_TAG_unspecified_parameters:
          // Note: C++ allows a single ... argument specification but C does
          // not. However, "extern int foo();" (note lack of "void" in
          // parameters) in C will produce the same DWARF as "extern int
          // foo(...);" in C++.
          CheckNoChildren(child);
          parameters.push_back(variadic_id_);
          break;
        case DW_TAG_enumeration_type:
        case DW_TAG_label:
        case DW_TAG_lexical_block:
        case DW_TAG_structure_type:
        case DW_TAG_class_type:
        case DW_TAG_union_type:
        case DW_TAG_typedef:
        case DW_TAG_const_type:
        case DW_TAG_volatile_type:
        case DW_TAG_restrict_type:
        case DW_TAG_atomic_type:
        case DW_TAG_array_type:
        case DW_TAG_pointer_type:
        case DW_TAG_reference_type:
        case DW_TAG_rvalue_reference_type:
        case DW_TAG_ptr_to_member_type:
        case DW_TAG_unspecified_type:
        case DW_TAG_inlined_subroutine:
        case DW_TAG_subprogram:
        case DW_TAG_variable:
        case DW_TAG_call_site:
        case DW_TAG_GNU_call_site:
          // TODO: Do not leak local types outside this scope.
          // TODO: It would be better to not process any
          // information that is function local but there is a dangling
          // reference Clang bug.
          Process(child);
          break;
        case DW_TAG_imported_declaration:
        case DW_TAG_imported_module:
          // For now information there is useless for ABI monitoring, but we
          // need to check that there is no missing information in descendants.
          CheckNoChildren(child);
          break;
        case DW_TAG_template_type_parameter:
        case DW_TAG_template_value_parameter:
        case DW_TAG_GNU_template_template_param:
        case DW_TAG_GNU_template_parameter_pack:
          // We just skip these as neither GCC nor Clang seem to use them
          // properly (resulting in no references to such DIEs).
          break;
        case DW_TAG_GNU_formal_parameter_pack:
          // https://wiki.dwarfstd.org/C++0x_Variadic_templates.md
          //
          // As per this (rejected) proposal, GCC includes parameters as
          // children of this DIE.
          for (auto& child2 : child.GetChildren()) {
            if (child2.GetTag() == DW_TAG_formal_parameter) {
              parameters.push_back(GetReferredTypeId(GetReferredType(child2)));
            }
          }
          break;
        default:
          Die() << "Unexpected tag for child of function: " << Hex(child_tag)
                << ", " << EntryToString(child);
      }
    }

    return Subprogram{.node = Function(return_type_id, parameters),
                      .name_with_context = GetNameWithContext(entry),
                      .linkage_name = GetLinkageName(version_, entry),
                      .address = entry.MaybeGetAddress(DW_AT_low_pc),
                      .external = entry.GetFlag(DW_AT_external)};
  }

  // Allocate or get already allocated STG Id for Entry.
  Id GetIdForEntry(Entry& entry) {
    return maker_.Get(Hex(entry.GetOffset()));
  }

  // Same as GetIdForEntry, but returns "void_id_" for "unspecified" references,
  // because it is normal for DWARF (5.2 Unspecified Type Entries).
  Id GetReferredTypeId(std::optional<Entry> referred_type) {
    return referred_type ? GetIdForEntry(*referred_type) : void_id_;
  }

  // Wrapper for GetIdForEntry to allow lvalues.
  Id GetReferredTypeId(Entry referred_type) {
    return GetIdForEntry(referred_type);
  }

  // Populate Id from method above with processed Node.
  template <typename Node, typename... Args>
  Id AddProcessedNode(Entry& entry, Args&&... args) {
    return maker_.Set<Node>(Hex(entry.GetOffset()),
                            std::forward<Args>(args)...);
  }

  void AddNamedTypeNode(Id id) {
    if (scope_.named) {
      result_.named_type_ids.push_back(id);
    }
  }

  Maker<Hex<Dwarf_Off>> maker_;
  Id void_id_;
  Id variadic_id_;
  bool is_little_endian_binary_;
  const std::unique_ptr<Filter>& file_filter_;
  Types& result_;
  std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
  std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;

  // Current scope.
  Scope scope_;
  int version_;
  dwarf::Files files_;
  uint64_t language_;
};

Types Process(Dwarf* dwarf, bool is_little_endian_binary,
              const std::unique_ptr<Filter>& file_filter, Graph& graph) {
  Types result;

  if (dwarf == nullptr) {
    return result;
  }

  const Id void_id = graph.Add<Special>(Special::Kind::VOID);
  const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC);
  // TODO: Scope Processor to compilation units?
  Processor processor(graph, void_id, variadic_id, is_little_endian_binary,
                      file_filter, result);
  for (auto& compilation_unit : GetCompilationUnits(*dwarf)) {
    // Could fetch top-level attributes like compiler here.
    processor.ProcessCompilationUnit(compilation_unit);
  }
  processor.ResolveSymbolSpecifications();

  return result;
}

}  // namespace dwarf
}  // namespace stg
