// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- mode: C++ -*-
//
// Copyright 2020-2022 Google LLC
//
// Licensed under the Apache License v2.0 with LLVM Exceptions (the
// "License"); you may not use this file except in compliance with the
// License.  You may obtain a copy of the License at
//
//     https://llvm.org/LICENSE.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Maria Teguiani
// Author: Giuliano Procida
// Author: Aleksei Vetrov

#include "elf_loader.h"

#include <elf.h>
#include <gelf.h>
#include <libelf.h>

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <limits>
#include <ostream>
#include <string>
#include <string_view>
#include <vector>

#include "error.h"
#include "graph.h"

namespace stg {
namespace elf {

namespace {

SymbolTableEntry::SymbolType ParseSymbolType(unsigned char symbol_type) {
  switch (symbol_type) {
    case STT_NOTYPE:
      return SymbolTableEntry::SymbolType::NOTYPE;
    case STT_OBJECT:
      return SymbolTableEntry::SymbolType::OBJECT;
    case STT_FUNC:
      return SymbolTableEntry::SymbolType::FUNCTION;
    case STT_SECTION:
      return SymbolTableEntry::SymbolType::SECTION;
    case STT_FILE:
      return SymbolTableEntry::SymbolType::FILE;
    case STT_COMMON:
      return SymbolTableEntry::SymbolType::COMMON;
    case STT_TLS:
      return SymbolTableEntry::SymbolType::TLS;
    case STT_GNU_IFUNC:
      return SymbolTableEntry::SymbolType::GNU_IFUNC;
    default:
      Die() << "Unknown ELF symbol type: " << symbol_type;
  }
}

SymbolTableEntry::Binding ParseSymbolBinding(unsigned char binding) {
  switch (binding) {
    case STB_LOCAL:
      return SymbolTableEntry::Binding::LOCAL;
    case STB_GLOBAL:
      return SymbolTableEntry::Binding::GLOBAL;
    case STB_WEAK:
      return SymbolTableEntry::Binding::WEAK;
    case STB_GNU_UNIQUE:
      return SymbolTableEntry::Binding::GNU_UNIQUE;
    default:
      Die() << "Unknown ELF symbol binding: " << binding;
  }
}

SymbolTableEntry::Visibility ParseSymbolVisibility(unsigned char visibility) {
  switch (visibility) {
    case STV_DEFAULT:
      return SymbolTableEntry::Visibility::DEFAULT;
    case STV_INTERNAL:
      return SymbolTableEntry::Visibility::INTERNAL;
    case STV_HIDDEN:
      return SymbolTableEntry::Visibility::HIDDEN;
    case STV_PROTECTED:
      return SymbolTableEntry::Visibility::PROTECTED;
    default:
      Die() << "Unknown ELF symbol visibility: " << visibility;
  }
}

SymbolTableEntry::ValueType ParseSymbolValueType(Elf64_Section section_index) {
  switch (section_index) {
    case SHN_UNDEF:
      return SymbolTableEntry::ValueType::UNDEFINED;
    case SHN_ABS:
      return SymbolTableEntry::ValueType::ABSOLUTE;
    case SHN_COMMON:
      return SymbolTableEntry::ValueType::COMMON;
    default:
      return SymbolTableEntry::ValueType::RELATIVE_TO_SECTION;
  }
}

std::string ElfHeaderTypeToString(unsigned char elf_header_type) {
  switch (elf_header_type) {
    case ET_NONE:
      return "none";
    case ET_REL:
      return "relocatable";
    case ET_EXEC:
      return "executable";
    case ET_DYN:
      return "shared object";
    case ET_CORE:
      return "coredump";
    default:
      return "unknown (type = " + std::to_string(elf_header_type) + ')';
  }
}

std::string ElfSectionTypeToString(Elf64_Word elf_section_type) {
  switch (elf_section_type) {
    case SHT_SYMTAB:
      return "symtab";
    case SHT_DYNSYM:
      return "dynsym";
    case SHT_GNU_verdef:
      return "GNU_verdef";
    case SHT_GNU_verneed:
      return "GNU_verneed";
    case SHT_GNU_versym:
      return "GNU_versym";
    default:
      return "unknown (type = " + std::to_string(elf_section_type) + ')';
  }
}

GElf_Half GetMachine(Elf* elf) {
  GElf_Ehdr header;
  Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header";
  return header.e_machine;
}

void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) {
  if (machine == EM_ARM) {
    if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION
        || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) {
      // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture"
      // section 5.5.3.  https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
      entry.value &= ~1;
    }
  } else if (machine == EM_AARCH64) {
    // Copy bit 55 over bits 56 to 63 which may be tag information.
    entry.value = entry.value & (1ULL << 55)
                  ? entry.value | (0xffULL << 56)
                  : entry.value & ~(0xffULL << 56);
  }
}

std::vector<Elf_Scn*> GetSectionsIf(
    Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) {
  std::vector<Elf_Scn*> result;
  Elf_Scn* section = nullptr;
  GElf_Shdr header;
  while ((section = elf_nextscn(elf, section)) != nullptr) {
    Check(gelf_getshdr(section, &header) != nullptr)
        << "could not get ELF section header";
    if (predicate(header)) {
      result.push_back(section);
    }
  }
  return result;
}

std::vector<Elf_Scn*> GetSectionsByName(Elf* elf, const std::string& name) {
  size_t shdr_strtab_index;
  Check(elf_getshdrstrndx(elf, &shdr_strtab_index) == 0)
      << "could not get ELF section header string table index";
  return GetSectionsIf(elf, [&](const GElf_Shdr& header) {
    const auto* section_name =
        elf_strptr(elf, shdr_strtab_index, header.sh_name);
    return section_name != nullptr && section_name == name;
  });
}

Elf_Scn* MaybeGetSectionByName(Elf* elf, const std::string& name) {
  const auto sections = GetSectionsByName(elf, name);
  if (sections.empty()) {
    return nullptr;
  }
  Check(sections.size() == 1)
      << "multiple sections found with name '" << name << "'";
  return sections[0];
}

Elf_Scn* GetSectionByName(Elf* elf, const std::string& name) {
  Elf_Scn* section = MaybeGetSectionByName(elf, name);
  Check(section != nullptr) << "no section found with name '" << name << "'";
  return section;
}

Elf_Scn* MaybeGetSectionByType(Elf* elf, Elf64_Word type) {
  auto sections = GetSectionsIf(
      elf, [&](const GElf_Shdr& header) { return header.sh_type == type; });
  if (sections.empty()) {
    return nullptr;
  }
  Check(sections.size() == 1) << "multiple sections found with type " << type;
  return sections[0];
}

Elf_Scn* GetSectionByIndex(Elf* elf, size_t index) {
  Elf_Scn* section = elf_getscn(elf, index);
  Check(section != nullptr) << "no section found with index " << index;
  return section;
}

struct SectionInfo {
  GElf_Shdr header;
  Elf_Data* data;
};

SectionInfo GetSectionInfo(Elf_Scn* section) {
  const size_t index = elf_ndxscn(section);
  GElf_Shdr section_header;
  Check(gelf_getshdr(section, &section_header) != nullptr)
      << "failed to read section (index = " << index << ") header";
  Elf_Data* data = elf_getdata(section, nullptr);
  Check(data != nullptr) << "section (index = " << index << ") data is invalid";
  return {section_header, data};
}

size_t GetNumberOfEntries(const GElf_Shdr& section_header) {
  Check(section_header.sh_entsize != 0)
      << "zero table entity size is unexpected for section "
      << ElfSectionTypeToString(section_header.sh_type);
  return section_header.sh_size / section_header.sh_entsize;
}

std::string_view GetRawData(Elf_Scn* section, const char* name) {
  Elf_Data* data = elf_rawdata(section, nullptr);
  Check(data != nullptr) << "elf_rawdata failed on section " << name;
  return {static_cast<char*>(data->d_buf), data->d_size};
}

std::string_view GetString(Elf* elf, uint32_t section, size_t offset) {
  const auto name = elf_strptr(elf, section, offset);

  Check(name != nullptr) << "string was not found (section: " << section
                         << ", offset: " << offset << ")";
  return name;
}

Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary) {
  GElf_Ehdr elf_header;
  Check(gelf_getehdr(elf, &elf_header) != nullptr)
      << "could not get ELF header";

  Elf_Scn* symtab = MaybeGetSectionByType(elf, SHT_SYMTAB);
  Elf_Scn* dynsym = MaybeGetSectionByType(elf, SHT_DYNSYM);
  if (symtab != nullptr && dynsym != nullptr) {
    // Relocatable ELF binaries, Linux kernel and modules have their
    // exported symbols in .symtab, all other ELF types have their
    // exported symbols in .dynsym.
    if (elf_header.e_type == ET_REL || is_linux_kernel_binary) {
      return symtab;
    }
    if (elf_header.e_type == ET_DYN || elf_header.e_type == ET_EXEC) {
      return dynsym;
    }
    Die() << "unsupported ELF type: '"
          << ElfHeaderTypeToString(elf_header.e_type) << "'";
  } else if (symtab != nullptr) {
    return symtab;
  } else if (dynsym != nullptr) {
    return dynsym;
  } else {
    Die() << "no ELF symbol table found";
  }
}


constexpr std::string_view kCFISuffix = ".cfi";

bool IsCFISymbolName(std::string_view name) {
  return name.ends_with(kCFISuffix);
}

}  // namespace

std::string_view UnwrapCFISymbolName(std::string_view cfi_name) {
  Check(IsCFISymbolName(cfi_name))
      << "CFI symbol " << cfi_name << " doesn't end with " << kCFISuffix;
  return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size());
}

namespace {

std::vector<SymbolTableEntry> GetSymbols(
    Elf* elf, Elf_Scn* symbol_table_section, bool cfi) {
  const auto machine = GetMachine(elf);
  const auto [symbol_table_header, symbol_table_data] =
      GetSectionInfo(symbol_table_section);
  const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);

  std::vector<SymbolTableEntry> result;
  result.reserve(number_of_symbols);

  // GElf uses int for indexes in symbol table, prevent int overflow.
  Check(number_of_symbols <= std::numeric_limits<int>::max())
      << "number of symbols exceeds INT_MAX";
  for (size_t i = 0; i < number_of_symbols; ++i) {
    GElf_Sym symbol;
    Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) !=
          nullptr)
        << "symbol (i = " << i << ") was not found";

    const auto name =
        GetString(elf, symbol_table_header.sh_link, symbol.st_name);
    if (cfi != IsCFISymbolName(name)) {
      continue;
    }
    SymbolTableEntry entry{
        .name = name,
        .value = symbol.st_value,
        .size = symbol.st_size,
        .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
        .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
        .visibility =
            ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
        .section_index = symbol.st_shndx,
        .value_type = ParseSymbolValueType(symbol.st_shndx),
    };
    AdjustAddress(machine, entry);
    result.push_back(entry);
  }

  return result;
}

bool IsLinuxKernelBinary(Elf* elf) {
  // The Linux kernel itself has many specific sections that are sufficient to
  // classify a binary as kernel binary if present, `__ksymtab_strings` is one
  // of them. It is present if a kernel binary (vmlinux or a module) exports
  // symbols via the EXPORT_SYMBOL_* macros and it contains symbol names and
  // namespaces which form part of the ABI.
  //
  // Kernel modules might not present a `__ksymtab_strings` section if they do
  // not export symbols themselves via the ksymtab. Yet they can be identified
  // by the presence of the `.modinfo` section. Since that is somewhat a generic
  // name, also check for the presence of `.gnu.linkonce.this_module` to get
  // solid signal as both of those sections are present in kernel modules.
  return MaybeGetSectionByName(elf, "__ksymtab_strings") != nullptr ||
         (MaybeGetSectionByName(elf, ".modinfo") != nullptr &&
          MaybeGetSectionByName(elf, ".gnu.linkonce.this_module") != nullptr);
}

bool IsRelocatable(Elf* elf) {
  GElf_Ehdr elf_header;
  Check(gelf_getehdr(elf, &elf_header) != nullptr)
      << "could not get ELF header";

  return elf_header.e_type == ET_REL;
}

bool IsLittleEndianBinary(Elf* elf) {
  GElf_Ehdr elf_header;
  Check(gelf_getehdr(elf, &elf_header) != nullptr)
      << "could not get ELF header";

  switch (auto endianness = elf_header.e_ident[EI_DATA]) {
    case ELFDATA2LSB:
      return true;
    case ELFDATA2MSB:
      return false;
    default:
      Die() << "Unsupported ELF endianness: " << endianness;
  }
}

}  // namespace

std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) {
  using SymbolType = SymbolTableEntry::SymbolType;
  switch (type) {
    case SymbolType::NOTYPE:
      return os << "notype";
    case SymbolType::OBJECT:
      return os << "object";
    case SymbolType::FUNCTION:
      return os << "function";
    case SymbolType::SECTION:
      return os << "section";
    case SymbolType::FILE:
      return os << "file";
    case SymbolType::COMMON:
      return os << "common";
    case SymbolType::TLS:
      return os << "TLS";
    case SymbolType::GNU_IFUNC:
      return os << "indirect (ifunc) function";
  }
}

std::ostream& operator<<(std::ostream& os,
                         const SymbolTableEntry::ValueType type) {
  using ValueType = SymbolTableEntry::ValueType;
  switch (type) {
    case ValueType::UNDEFINED:
      return os << "undefined";
    case ValueType::ABSOLUTE:
      return os << "absolute";
    case ValueType::COMMON:
      return os << "common";
    case ValueType::RELATIVE_TO_SECTION:
      return os << "relative";
  }
}

ElfLoader::ElfLoader(Elf& elf)
    : elf_(&elf) {
  InitializeElfInformation();
}

void ElfLoader::InitializeElfInformation() {
  is_linux_kernel_binary_ = elf::IsLinuxKernelBinary(elf_);
  is_relocatable_ = elf::IsRelocatable(elf_);
  is_little_endian_binary_ = elf::IsLittleEndianBinary(elf_);
}

std::string_view ElfLoader::GetSectionRawData(const char* name) const {
  return GetRawData(GetSectionByName(elf_, name), name);
}

std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const {
  Elf_Scn* symbol_table_section =
      GetSymbolTableSection(elf_, is_linux_kernel_binary_);
  Check(symbol_table_section != nullptr)
      << "failed to find symbol table section";

  return GetSymbols(elf_, symbol_table_section, /* cfi = */ false);
}

std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const {
  // CFI symbols may be only in .symtab
  Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB);
  if (symbol_table_section == nullptr) {
    // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab,
    // because it was trimmed away. We can't determine whether there were CFI
    // symbols in the first place, so the best we can do is returning an empty
    // list.
    return {};
  }
  return GetSymbols(elf_, symbol_table_section, /* cfi = */ true);
}

ElfSymbol::CRC ElfLoader::GetElfSymbolCRC(
    const SymbolTableEntry& symbol) const {
  Check(is_little_endian_binary_)
      << "CRC is not supported in big-endian binaries";
  const auto address = GetAbsoluteAddress(symbol);
  if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
    return ElfSymbol::CRC{static_cast<uint32_t>(address)};
  }
  Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
      << "CRC symbol is expected to be absolute or relative to a section";

  const auto section = GetSectionByIndex(elf_, symbol.section_index);
  const auto [header, data] = GetSectionInfo(section);
  Check(data->d_buf != nullptr) << "Section has no data buffer";

  Check(address >= header.sh_addr)
      << "CRC symbol address is below CRC section start";

  const size_t offset = address - header.sh_addr;
  const size_t offset_end = offset + sizeof(uint32_t);
  Check(offset_end <= data->d_size && offset_end <= header.sh_size)
      << "CRC symbol address is above CRC section end";

  return ElfSymbol::CRC{*reinterpret_cast<uint32_t*>(
      reinterpret_cast<char*>(data->d_buf) + offset)};
}

std::string_view ElfLoader::GetElfSymbolNamespace(
    const SymbolTableEntry& symbol) const {
  Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
      << "Namespace symbol is expected to be relative to a section";

  const auto section = GetSectionByIndex(elf_, symbol.section_index);
  const auto [header, data] = GetSectionInfo(section);
  Check(data->d_buf != nullptr) << "Section has no data buffer";

  const auto address = GetAbsoluteAddress(symbol);
  Check(address >= header.sh_addr)
      << "Namespace symbol address is below namespace section start";

  const size_t offset = address - header.sh_addr;
  Check(offset < data->d_size && offset < header.sh_size)
      << "Namespace symbol address is above namespace section end";

  const char* begin = reinterpret_cast<const char*>(data->d_buf) + offset;
  // TODO: replace strnlen with something in a standard library
  const size_t length = strnlen(begin, data->d_size - offset);
  Check(offset + length < data->d_size)
      << "Namespace string should be null-terminated";

  return {begin, length};
}

size_t ElfLoader::GetAbsoluteAddress(const SymbolTableEntry& symbol) const {
  if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
    return symbol.value;
  }
  Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
      << "Only absolute and relative to sections symbols are supported";
  // In relocatable files, st_value holds a section offset for a defined symbol.
  if (is_relocatable_) {
    const auto section = GetSectionByIndex(elf_, symbol.section_index);
    GElf_Shdr header;
    Check(gelf_getshdr(section, &header) != nullptr)
        << "failed to get symbol section header";
    Check(symbol.value + symbol.size <= header.sh_size)
        << "Symbol should be inside the section";
    return symbol.value + header.sh_addr;
  }
  // In executable and shared object files, st_value holds a virtual address.
  return symbol.value;
}

bool ElfLoader::IsLinuxKernelBinary() const {
  return is_linux_kernel_binary_;
}

bool ElfLoader::IsLittleEndianBinary() const {
  return is_little_endian_binary_;
}

}  // namespace elf
}  // namespace stg
