/*
 * Copyright 2019 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "jit_memory_region.h"

#include <fcntl.h>
#include <unistd.h>

#include <android-base/unique_fd.h>
#include <log/log.h>
#include "base/bit_utils.h"  // For RoundDown, RoundUp
#include "base/globals.h"
#include "base/logging.h"  // For VLOG.
#include "base/membarrier.h"
#include "base/memfd.h"
#include "base/systrace.h"
#include "gc/allocator/art-dlmalloc.h"
#include "jit/jit_scoped_code_cache_write.h"
#include "oat/oat_quick_method_header.h"
#include "palette/palette.h"

using android::base::unique_fd;

namespace art HIDDEN {
namespace jit {

// Data cache will be half of the capacity
// Code cache will be the other half of the capacity.
// TODO: Make this adjustable. Currently must be 2. JitCodeCache relies on that.
static constexpr size_t kCodeAndDataCapacityDivider = 2;

bool JitMemoryRegion::Initialize(size_t initial_capacity,
                                 size_t max_capacity,
                                 bool rwx_memory_allowed,
                                 bool is_zygote,
                                 std::string* error_msg) {
  ScopedTrace trace(__PRETTY_FUNCTION__);

  CHECK_GE(max_capacity, initial_capacity);
  CHECK(max_capacity <= 1 * GB) << "The max supported size for JIT code cache is 1GB";
  // Align both capacities to page size, as that's the unit mspaces use.
  initial_capacity_ = RoundDown(initial_capacity, 2 * gPageSize);
  max_capacity_ = RoundDown(max_capacity, 2 * gPageSize);
  current_capacity_ = initial_capacity,
  data_end_ = initial_capacity / kCodeAndDataCapacityDivider;
  exec_end_ = initial_capacity - data_end_;

  const size_t capacity = max_capacity_;
  const size_t data_capacity = capacity / kCodeAndDataCapacityDivider;
  const size_t exec_capacity = capacity - data_capacity;

  // File descriptor enabling dual-view mapping of code section.
  unique_fd mem_fd;


  // The memory mappings we are going to create.
  MemMap data_pages;
  MemMap exec_pages;
  MemMap non_exec_pages;
  MemMap writable_data_pages;

  if (is_zygote) {
    android_errorWriteLog(0x534e4554, "200284993");  // Report to SafetyNet.
    // Because we are not going to GC code generated by the zygote, just use all available.
    current_capacity_ = max_capacity;
    mem_fd = unique_fd(CreateZygoteMemory(capacity, error_msg));
    if (mem_fd.get() < 0) {
      return false;
    }
  } else {
    // Bionic supports memfd_create, but the call may fail on older kernels.
    mem_fd = unique_fd(art::memfd_create("jit-cache", /* flags= */ 0));
    if (mem_fd.get() < 0) {
      std::ostringstream oss;
      oss << "Failed to initialize dual view JIT. memfd_create() error: " << strerror(errno);
      if (!rwx_memory_allowed) {
        // Without using RWX page permissions, the JIT can not fallback to single mapping as it
        // requires tranitioning the code pages to RWX for updates.
        *error_msg = oss.str();
        return false;
      }
      VLOG(jit) << oss.str();
    } else if (ftruncate(mem_fd, capacity) != 0) {
      std::ostringstream oss;
      oss << "Failed to initialize memory file: " << strerror(errno);
      *error_msg = oss.str();
      return false;
    }
  }

  // Map name specific for android_os_Debug.cpp accounting.
  std::string data_cache_name = is_zygote ? "zygote-data-code-cache" : "data-code-cache";
  std::string exec_cache_name = is_zygote ? "zygote-jit-code-cache" : "jit-code-cache";

  std::string error_str;
  int base_flags;
  if (mem_fd.get() >= 0) {
    // Dual view of JIT code cache case. Create an initial mapping of data pages large enough
    // for data and non-writable view of JIT code pages. We use the memory file descriptor to
    // enable dual mapping - we'll create a second mapping using the descriptor below. The
    // mappings will look like:
    //
    //       VA                  PA
    //
    //       +---------------+
    //       | non exec code |\
    //       +---------------+ \
    //       | writable data |\ \
    //       +---------------+ \ \
    //       :               :\ \ \
    //       +---------------+.\.\.+---------------+
    //       |  exec code    |  \ \|     code      |
    //       +---------------+...\.+---------------+
    //       | readonly data |    \|     data      |
    //       +---------------+.....+---------------+
    //
    // In this configuration code updates are written to the non-executable view of the code
    // cache, and the executable view of the code cache has fixed RX memory protections.
    //
    // This memory needs to be mapped shared as the code portions will have two mappings.
    //
    // Additionally, the zyzote will create a dual view of the data portion of
    // the cache. This mapping will be read-only, whereas the second mapping
    // will be writable.

    base_flags = MAP_SHARED;

    // Create the writable mappings now, so that in case of the zygote, we can
    // prevent any future writable mappings through sealing.
    if (exec_capacity > 0) {
      // For dual view, create the secondary view of code memory used for updating code. This view
      // is never executable.
      std::string name = exec_cache_name + "-rw";
      non_exec_pages = MemMap::MapFile(exec_capacity,
                                       kIsDebugBuild ? kProtR : kProtRW,
                                       base_flags,
                                       mem_fd,
                                       /* start= */ data_capacity,
                                       /* low_4GB= */ false,
                                       name.c_str(),
                                       &error_str);
      if (!non_exec_pages.IsValid()) {
        // This is unexpected.
        *error_msg = "Failed to map non-executable view of JIT code cache";
        return false;
      }
      // Create a dual view of the data cache.
      name = data_cache_name + "-rw";
      writable_data_pages = MemMap::MapFile(data_capacity,
                                            kProtRW,
                                            base_flags,
                                            mem_fd,
                                            /* start= */ 0,
                                            /* low_4GB= */ false,
                                            name.c_str(),
                                            &error_str);
      if (!writable_data_pages.IsValid()) {
        std::ostringstream oss;
        oss << "Failed to create dual data view: " << error_str;
        *error_msg = oss.str();
        return false;
      }
      if (writable_data_pages.MadviseDontFork() != 0) {
        *error_msg = "Failed to MadviseDontFork the writable data view";
        return false;
      }
      if (non_exec_pages.MadviseDontFork() != 0) {
        *error_msg = "Failed to MadviseDontFork the writable code view";
        return false;
      }
      // Now that we have created the writable and executable mappings, prevent creating any new
      // ones.
      if (is_zygote && !ProtectZygoteMemory(mem_fd.get(), error_msg)) {
        return false;
      }
    }

    // Map in low 4gb to simplify accessing root tables for x86_64.
    // We could do PC-relative addressing to avoid this problem, but that
    // would require reserving code and data area before submitting, which
    // means more windows for the code memory to be RWX.
    data_pages = MemMap::MapFile(
        data_capacity + exec_capacity,
        kProtR,
        base_flags,
        mem_fd,
        /* start= */ 0,
        /* low_4gb= */ true,
        data_cache_name.c_str(),
        &error_str);
  } else {
    // Single view of JIT code cache case. Create an initial mapping of data pages large enough
    // for data and JIT code pages. The mappings will look like:
    //
    //       VA                  PA
    //
    //       +---------------+...+---------------+
    //       |  exec code    |   |     code      |
    //       +---------------+...+---------------+
    //       |      data     |   |     data      |
    //       +---------------+...+---------------+
    //
    // In this configuration code updates are written to the executable view of the code cache,
    // and the executable view of the code cache transitions RX to RWX for the update and then
    // back to RX after the update.
    base_flags = MAP_PRIVATE | MAP_ANON;
    data_pages = MemMap::MapAnonymous(
        data_cache_name.c_str(),
        data_capacity + exec_capacity,
        kProtRW,
        /* low_4gb= */ true,
        &error_str);
  }

  if (!data_pages.IsValid()) {
    std::ostringstream oss;
    oss << "Failed to create read write cache: " << error_str << " size=" << capacity;
    *error_msg = oss.str();
    return false;
  }

  if (exec_capacity > 0) {
    uint8_t* const divider = data_pages.Begin() + data_capacity;
    // Set initial permission for executable view to catch any SELinux permission problems early
    // (for processes that cannot map WX pages). Otherwise, this region does not need to be
    // executable as there is no code in the cache yet.
    exec_pages = data_pages.RemapAtEnd(divider,
                                       exec_cache_name.c_str(),
                                       kProtRX,
                                       base_flags | MAP_FIXED,
                                       mem_fd.get(),
                                       (mem_fd.get() >= 0) ? data_capacity : 0,
                                       &error_str);
    if (!exec_pages.IsValid()) {
      std::ostringstream oss;
      oss << "Failed to create read execute code cache: " << error_str << " size=" << capacity;
      *error_msg = oss.str();
      return false;
    }
  } else {
    // Profiling only. No memory for code required.
  }

  data_pages_ = std::move(data_pages);
  exec_pages_ = std::move(exec_pages);
  non_exec_pages_ = std::move(non_exec_pages);
  writable_data_pages_ = std::move(writable_data_pages);

  VLOG(jit) << "Created JitMemoryRegion"
            << ": data_pages=" << reinterpret_cast<void*>(data_pages_.Begin())
            << ", exec_pages=" << reinterpret_cast<void*>(exec_pages_.Begin())
            << ", non_exec_pages=" << reinterpret_cast<void*>(non_exec_pages_.Begin())
            << ", writable_data_pages=" << reinterpret_cast<void*>(writable_data_pages_.Begin());

  // Now that the pages are initialized, initialize the spaces.

  // Initialize the data heap.
  data_mspace_ = create_mspace_with_base(
      HasDualDataMapping() ? writable_data_pages_.Begin() : data_pages_.Begin(),
      data_end_,
      /* locked= */ false);
  CHECK(data_mspace_ != nullptr) << "create_mspace_with_base (data) failed";

  // Allow mspace to use the full data capacity.
  // It will still only use as litle memory as possible and ask for MoreCore as needed.
  CHECK(IsAlignedParam(data_capacity, gPageSize));
  mspace_set_footprint_limit(data_mspace_, data_capacity);

  // Initialize the code heap.
  MemMap* code_heap = nullptr;
  if (non_exec_pages_.IsValid()) {
    code_heap = &non_exec_pages_;
  } else if (exec_pages_.IsValid()) {
    code_heap = &exec_pages_;
  }
  if (code_heap != nullptr) {
    // Make all pages reserved for the code heap writable. The mspace allocator, that manages the
    // heap, will take and initialize pages in create_mspace_with_base().
    {
      ScopedCodeCacheWrite scc(*this);
      exec_mspace_ = create_mspace_with_base(code_heap->Begin(), exec_end_, false /*locked*/);
    }
    CHECK(exec_mspace_ != nullptr) << "create_mspace_with_base (exec) failed";
    SetFootprintLimit(current_capacity_);
  } else {
    exec_mspace_ = nullptr;
    SetFootprintLimit(current_capacity_);
  }
  return true;
}

void JitMemoryRegion::SetFootprintLimit(size_t new_footprint) {
  size_t data_space_footprint = new_footprint / kCodeAndDataCapacityDivider;
  DCHECK(IsAlignedParam(data_space_footprint, gPageSize));
  DCHECK_EQ(data_space_footprint * kCodeAndDataCapacityDivider, new_footprint);
  if (HasCodeMapping()) {
    ScopedCodeCacheWrite scc(*this);
    mspace_set_footprint_limit(exec_mspace_, new_footprint - data_space_footprint);
  }
}

bool JitMemoryRegion::IncreaseCodeCacheCapacity() {
  if (current_capacity_ == max_capacity_) {
    return false;
  }

  // Double the capacity if we're below 1MB, or increase it by 1MB if
  // we're above.
  if (current_capacity_ < 1 * MB) {
    current_capacity_ *= 2;
  } else {
    current_capacity_ += 1 * MB;
  }
  if (current_capacity_ > max_capacity_) {
    current_capacity_ = max_capacity_;
  }

  VLOG(jit) << "Increasing code cache capacity to " << PrettySize(current_capacity_);

  SetFootprintLimit(current_capacity_);

  return true;
}

// NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock
// is already held.
void* JitMemoryRegion::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS {
  if (mspace == exec_mspace_) {
    CHECK(exec_mspace_ != nullptr);
    const MemMap* const code_pages = GetUpdatableCodeMapping();
    void* result = code_pages->Begin() + exec_end_;
    exec_end_ += increment;
    return result;
  } else {
    CHECK_EQ(data_mspace_, mspace);
    const MemMap* const writable_data_pages = GetWritableDataMapping();
    void* result = writable_data_pages->Begin() + data_end_;
    data_end_ += increment;
    return result;
  }
}

const uint8_t* JitMemoryRegion::CommitCode(ArrayRef<const uint8_t> reserved_code,
                                           ArrayRef<const uint8_t> code,
                                           const uint8_t* stack_map) {
  DCHECK(IsInExecSpace(reserved_code.data()));
  ScopedCodeCacheWrite scc(*this);

  size_t alignment = GetInstructionSetCodeAlignment(kRuntimeISA);
  size_t header_size = OatQuickMethodHeader::InstructionAlignedSize();
  size_t total_size = header_size + code.size();

  // Each allocation should be on its own set of cache lines.
  // `total_size` covers the OatQuickMethodHeader, the JIT generated machine code,
  // and any alignment padding.
  DCHECK_GT(total_size, header_size);
  DCHECK_LE(total_size, reserved_code.size());
  uint8_t* x_memory = const_cast<uint8_t*>(reserved_code.data());
  uint8_t* w_memory = const_cast<uint8_t*>(GetNonExecutableAddress(x_memory));
  // Ensure the header ends up at expected instruction alignment.
  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(w_memory + header_size), alignment);
  const uint8_t* result = x_memory + header_size;

  // Write the code.
  std::copy(code.begin(), code.end(), w_memory + header_size);

  // Write the header.
  OatQuickMethodHeader* method_header =
      OatQuickMethodHeader::FromCodePointer(w_memory + header_size);
  new (method_header) OatQuickMethodHeader((stack_map != nullptr) ? result - stack_map : 0u);

  // Both instruction and data caches need flushing to the point of unification where both share
  // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
  // newly added code are written out to the point of unification. Flushing the instruction
  // cache ensures the newly written code will be fetched from the point of unification before
  // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
  // prevent stale code from residing in the instruction cache.
  //
  // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
  // may trigger a segfault if a page fault occurs when requesting a cache maintenance
  // operation. This is a kernel bug that we need to work around until affected devices
  // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
  //
  // For reference, this behavior is caused by this commit:
  // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
  //
  bool cache_flush_success = true;
  if (HasDualCodeMapping()) {
    // Flush d-cache for the non-executable mapping.
    cache_flush_success = FlushCpuCaches(w_memory, w_memory + total_size);
  }

  // Invalidate i-cache for the executable mapping.
  if (cache_flush_success) {
    cache_flush_success = FlushCpuCaches(x_memory, x_memory + total_size);
  }

  // If flushing the cache has failed, reject the allocation because we can't guarantee
  // correctness of the instructions present in the processor caches.
  if (!cache_flush_success) {
    PLOG(ERROR) << "Cache flush failed triggering code allocation failure";
    return nullptr;
  }

  // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
  // correctness as code may still be in instruction pipelines despite the i-cache flush. It is
  // not safe to assume that changing permissions with mprotect (RX->RWX->RX) will cause a TLB
  // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
  // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
  // hardware support that broadcasts TLB invalidations and so their kernels have no software
  // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
  // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
  // platforms lacking the appropriate support.
  art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);

  return result;
}

static void FillRootTable(uint8_t* roots_data, const std::vector<Handle<mirror::Object>>& roots)
    REQUIRES(Locks::jit_lock_)
    REQUIRES_SHARED(Locks::mutator_lock_) {
  GcRoot<mirror::Object>* gc_roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
  const uint32_t length = roots.size();
  // Put all roots in `roots_data`.
  for (uint32_t i = 0; i < length; ++i) {
    ObjPtr<mirror::Object> object = roots[i].Get();
    gc_roots[i] = GcRoot<mirror::Object>(object);
  }
  // Store the length of the table at the end. This will allow fetching it from a stack_map
  // pointer.
  reinterpret_cast<uint32_t*>(roots_data)[length] = length;
}

bool JitMemoryRegion::CommitData(ArrayRef<const uint8_t> reserved_data,
                                 const std::vector<Handle<mirror::Object>>& roots,
                                 ArrayRef<const uint8_t> stack_map) {
  DCHECK(IsInDataSpace(reserved_data.data()));
  uint8_t* roots_data = GetWritableDataAddress(reserved_data.data());
  size_t root_table_size = ComputeRootTableSize(roots.size());
  uint8_t* stack_map_data = roots_data + root_table_size;
  DCHECK_LE(root_table_size + stack_map.size(), reserved_data.size());
  FillRootTable(roots_data, roots);
  memcpy(stack_map_data, stack_map.data(), stack_map.size());
  // Flush data cache, as compiled code references literals in it.
  // TODO(oth): establish whether this is necessary.
  if (UNLIKELY(!FlushCpuCaches(roots_data, roots_data + root_table_size + stack_map.size()))) {
    VLOG(jit) << "Failed to flush data in CommitData";
    return false;
  }
  return true;
}

const uint8_t* JitMemoryRegion::AllocateCode(size_t size) {
  size_t alignment = GetInstructionSetCodeAlignment(kRuntimeISA);
  void* result = mspace_memalign(exec_mspace_, alignment, size);
  if (UNLIKELY(result == nullptr)) {
    return nullptr;
  }
  used_memory_for_code_ += mspace_usable_size(result);
  return reinterpret_cast<uint8_t*>(GetExecutableAddress(result));
}

void JitMemoryRegion::FreeCode(const uint8_t* code) {
  code = GetNonExecutableAddress(code);
  used_memory_for_code_ -= mspace_usable_size(code);
  mspace_free(exec_mspace_, const_cast<uint8_t*>(code));
}

const uint8_t* JitMemoryRegion::AllocateData(size_t data_size) {
  void* result = mspace_malloc(data_mspace_, data_size);
  if (UNLIKELY(result == nullptr)) {
    return nullptr;
  }
  used_memory_for_data_ += mspace_usable_size(result);
  return reinterpret_cast<uint8_t*>(GetNonWritableDataAddress(result));
}

void JitMemoryRegion::FreeData(const uint8_t* data) {
  FreeWritableData(GetWritableDataAddress(data));
}

void JitMemoryRegion::FreeWritableData(uint8_t* writable_data) REQUIRES(Locks::jit_lock_) {
  used_memory_for_data_ -= mspace_usable_size(writable_data);
  mspace_free(data_mspace_, writable_data);
}

#if defined(__BIONIC__) && defined(ART_TARGET)
// The code below only works on bionic on target.

int JitMemoryRegion::CreateZygoteMemory(size_t capacity, std::string* error_msg) {
  if (CacheOperationsMaySegFault()) {
    // Zygote JIT requires dual code mappings by design. We can only do this if the cache flush
    // and invalidate instructions work without raising faults.
    *error_msg = "Zygote memory only works with dual mappings";
    return -1;
  }
  /* Check if kernel support exists, otherwise fall back to ashmem */
  static const char* kRegionName = "jit-zygote-cache";
  if (art::IsSealFutureWriteSupported()) {
    int fd = art::memfd_create(kRegionName, MFD_ALLOW_SEALING);
    if (fd == -1) {
      std::ostringstream oss;
      oss << "Failed to create zygote mapping: " << strerror(errno);
      *error_msg = oss.str();
      return -1;
    }

    if (ftruncate(fd, capacity) != 0) {
      std::ostringstream oss;
      oss << "Failed to create zygote mapping: " << strerror(errno);
      *error_msg = oss.str();
      return -1;
    }

    return fd;
  }

  LOG(INFO) << "Falling back to ashmem implementation for JIT zygote mapping";

  int fd;
  palette_status_t status = PaletteAshmemCreateRegion(kRegionName, capacity, &fd);
  if (status != PALETTE_STATUS_OK) {
    CHECK_EQ(status, PALETTE_STATUS_CHECK_ERRNO);
    std::ostringstream oss;
    oss << "Failed to create zygote mapping: " << strerror(errno);
    *error_msg = oss.str();
    return -1;
  }
  return fd;
}

bool JitMemoryRegion::ProtectZygoteMemory(int fd, std::string* error_msg) {
  if (art::IsSealFutureWriteSupported()) {
    if (fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL | F_SEAL_FUTURE_WRITE)
            == -1) {
      std::ostringstream oss;
      oss << "Failed to protect zygote mapping: " << strerror(errno);
      *error_msg = oss.str();
      return false;
    }
  } else {
    palette_status_t status = PaletteAshmemSetProtRegion(fd, PROT_READ | PROT_EXEC);
    if (status != PALETTE_STATUS_OK) {
      CHECK_EQ(status, PALETTE_STATUS_CHECK_ERRNO);
      std::ostringstream oss;
      oss << "Failed to protect zygote mapping: " << strerror(errno);
      *error_msg = oss.str();
      return false;
    }
  }
  return true;
}

#else

int JitMemoryRegion::CreateZygoteMemory(size_t capacity, std::string* error_msg) {
  // To simplify host building, we don't rely on the latest memfd features.
  LOG(WARNING) << "Returning un-sealable region on non-bionic";
  static const char* kRegionName = "/jit-zygote-cache";
  int fd = art::memfd_create(kRegionName, 0);
  if (fd == -1) {
    std::ostringstream oss;
    oss << "Failed to create zygote mapping: " << strerror(errno);
    *error_msg = oss.str();
    return -1;
  }
  if (ftruncate(fd, capacity) != 0) {
    std::ostringstream oss;
    oss << "Failed to create zygote mapping: " << strerror(errno);
    *error_msg = oss.str();
    return -1;
  }
  return fd;
}

bool JitMemoryRegion::ProtectZygoteMemory([[maybe_unused]] int fd,
                                          [[maybe_unused]] std::string* error_msg) {
  return true;
}

#endif

}  // namespace jit
}  // namespace art
