/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <inttypes.h>
#include <libgen.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
#include <time.h>
#include <unistd.h>
#include <chrono>
#include <filesystem>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/parseint.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#include <llvm/Support/MemoryBuffer.h>
#pragma clang diagnostic pop

#if defined(__ANDROID__)
#include <android-base/properties.h>
#endif
#include <unwindstack/Error.h>

#include "BranchListFile.h"
#include "CallChainJoiner.h"
#include "ETMRecorder.h"
#include "IOEventLoop.h"
#include "JITDebugReader.h"
#include "MapRecordReader.h"
#include "OfflineUnwinder.h"
#include "ProbeEvents.h"
#include "RecordFilter.h"
#include "cmd_record_impl.h"
#include "command.h"
#include "environment.h"
#include "event_selection_set.h"
#include "event_type.h"
#include "kallsyms.h"
#include "read_apk.h"
#include "read_elf.h"
#include "read_symbol_map.h"
#include "record.h"
#include "record_file.h"
#include "thread_tree.h"
#include "tracing.h"
#include "utils.h"
#include "workload.h"

namespace simpleperf {
namespace {

using android::base::ParseUint;
using android::base::Realpath;

static std::string default_measured_event_type = "cpu-cycles";

static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
    {"u", PERF_SAMPLE_BRANCH_USER},
    {"k", PERF_SAMPLE_BRANCH_KERNEL},
    {"any", PERF_SAMPLE_BRANCH_ANY},
    {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
    {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
    {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
};

static std::unordered_map<std::string, int> clockid_map = {
    {"realtime", CLOCK_REALTIME},
    {"monotonic", CLOCK_MONOTONIC},
    {"monotonic_raw", CLOCK_MONOTONIC_RAW},
    {"boottime", CLOCK_BOOTTIME},
};

// The max size of records dumped by kernel is 65535, and dump stack size
// should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
static constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;

// The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
// Here 1024 is a desired value for pages in mapped buffer. If mapped
// successfully, the buffer size = 1024 * 4K (page size) = 4M.
static constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;

// Cache size used by CallChainJoiner to cache call chains in memory.
static constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * kMegabyte;

static constexpr size_t kDefaultAuxBufferSize = 4 * kMegabyte;

// On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
// So make default interval to 100ms.
static constexpr uint32_t kDefaultEtmDataFlushIntervalInMs = 100;

struct TimeStat {
  uint64_t prepare_recording_time = 0;
  uint64_t start_recording_time = 0;
  uint64_t stop_recording_time = 0;
  uint64_t finish_recording_time = 0;
  uint64_t post_process_time = 0;
};

std::optional<size_t> GetDefaultRecordBufferSize(bool system_wide_recording) {
  // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
  // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
  // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
  // But on devices with memory >= 4GB, we increase buffer size to 256MB. This reduces the chance
  // of cutting samples, which can cause broken callchains.
  static constexpr size_t kLowMemoryRecordBufferSize = 64 * kMegabyte;
  static constexpr size_t kHighMemoryRecordBufferSize = 256 * kMegabyte;
  static constexpr size_t kSystemWideRecordBufferSize = 256 * kMegabyte;
  // Ideally we can use >= 4GB here. But the memory size shown in /proc/meminfo is like to be 3.x GB
  // on a device with 4GB memory. So we have to use <= 3GB.
  static constexpr uint64_t kLowMemoryLimit = 3 * kGigabyte;

  if (system_wide_recording) {
    return kSystemWideRecordBufferSize;
  }
  return GetMemorySize() <= kLowMemoryLimit ? kLowMemoryRecordBufferSize
                                            : kHighMemoryRecordBufferSize;
}

class RecordCommand : public Command {
 public:
  RecordCommand()
      : Command(
            "record", "record sampling info in perf.data",
            // clang-format off
"Usage: simpleperf record [options] [--] [command [command-args]]\n"
"       Gather sampling information of running [command]. And -a/-p/-t option\n"
"       can be used to change target of sampling information.\n"
"       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
"Select monitored threads:\n"
"-a     System-wide collection. Use with --exclude-perf to exclude samples for\n"
"       simpleperf process.\n"
#if defined(__ANDROID__)
"--app package_name    Profile the process of an Android application.\n"
"                      On non-rooted devices, the app must be debuggable,\n"
"                      because we use run-as to switch to the app's context.\n"
#endif
"-p pid_or_process_name_regex1,pid_or_process_name_regex2,...\n"
"                      Record events on existing processes. Processes are searched either by pid\n"
"                      or process name regex. Mutually exclusive with -a.\n"
"-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
"\n"
"Select monitored event types:\n"
"-e event1[:modifier1],event2[:modifier2],...\n"
"             Select a list of events to record. An event can be:\n"
"               1) an event name listed in `simpleperf list`;\n"
"               2) a raw PMU event in rN format. N is a hex number.\n"
"                  For example, r1b selects event number 0x1b.\n"
"               3) a kprobe event added by --kprobe option.\n"
"             Modifiers can be added to define how the event should be\n"
"             monitored. Possible modifiers are:\n"
"                u - monitor user space events only\n"
"                k - monitor kernel space events only\n"
"--group event1[:modifier],event2[:modifier2],...\n"
"             Similar to -e option. But events specified in the same --group\n"
"             option are monitored as a group, and scheduled in and out at the\n"
"             same time.\n"
"--trace-offcpu   Generate samples when threads are scheduled off cpu.\n"
"                 Similar to \"-c 1 -e sched:sched_switch\".\n"
"--kprobe kprobe_event1,kprobe_event2,...\n"
"             Add kprobe events during recording. The kprobe_event format is in\n"
"             Documentation/trace/kprobetrace.rst in the kernel. Examples:\n"
"               'p:myprobe do_sys_openat2 $arg2:string'   - add event kprobes:myprobe\n"
"               'r:myretprobe do_sys_openat2 $retval:s64' - add event kprobes:myretprobe\n"
"--add-counter event1,event2,...     Add additional event counts in record samples. For example,\n"
"                                    we can use `-e cpu-cycles --add-counter instructions` to\n"
"                                    get samples for cpu-cycles event, while having instructions\n"
"                                    event count for each sample.\n"
"\n"
"Select monitoring options:\n"
"-f freq      Set event sample frequency. It means recording at most [freq]\n"
"             samples every second. For non-tracepoint events, the default\n"
"             option is -f 4000. A -f/-c option affects all event types\n"
"             following it until meeting another -f/-c option. For example,\n"
"             for \"-f 1000 -e cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
"             has sample freq 1000, sched:sched_switch event has sample period 1.\n"
"-c count     Set event sample period. It means recording one sample when\n"
"             [count] events happen. For tracepoint events, the default option\n"
"             is -c 1.\n"
"--call-graph fp | dwarf[,<dump_stack_size>]\n"
"             Enable call graph recording. Use frame pointer or dwarf debug\n"
"             frame as the method to parse call graph in stack.\n"
"             Default is no call graph. Default dump_stack_size with -g is 65528.\n"
"-g           Same as '--call-graph dwarf'.\n"
"--clockid clock_id      Generate timestamps of samples using selected clock.\n"
"                        Possible values are: realtime, monotonic,\n"
"                        monotonic_raw, boottime, perf. If supported, default\n"
"                        is monotonic, otherwise is perf.\n"
"--cpu cpu_item1,cpu_item2,...  Monitor events on selected cpus. cpu_item can be a number like\n"
"                               1, or a range like 0-3. A --cpu option affects all event types\n"
"                               following it until meeting another --cpu option.\n"
"--delay    time_in_ms   Wait time_in_ms milliseconds before recording samples.\n"
"--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
"                        [command]. Here time_in_sec may be any positive\n"
"                        floating point number.\n"
"-j branch_filter1,branch_filter2,...\n"
"             Enable taken branch stack sampling. Each sample captures a series\n"
"             of consecutive taken branches.\n"
"             The following filters are defined:\n"
"                any: any type of branch\n"
"                any_call: any function call or system call\n"
"                any_ret: any function return or system call return\n"
"                ind_call: any indirect branch\n"
"                u: only when the branch target is at the user level\n"
"                k: only when the branch target is in the kernel\n"
"             This option requires at least one branch type among any, any_call,\n"
"             any_ret, ind_call.\n"
"-b           Enable taken branch stack sampling. Same as '-j any'.\n"
"-m mmap_pages   Set pages used in the kernel to cache sample data for each cpu.\n"
"                It should be a power of 2. If not set, the max possible value <= 1024\n"
"                will be used.\n"
"--user-buffer-size <buffer_size> Set buffer size in userspace to cache sample data.\n"
"                                 By default, it is %s.\n"
"--no-inherit  Don't record created child threads/processes.\n"
"--cpu-percent <percent>  Set the max percent of cpu time used for recording.\n"
"                         percent is in range [1-100], default is 25.\n"
"\n"
"--tp-filter filter_string    Set filter_string for the previous tracepoint event.\n"
"                             Format is in Documentation/trace/events.rst in the kernel.\n"
"                             An example: 'prev_comm != \"simpleperf\" && (prev_pid > 1)'.\n"
"\n"
"Dwarf unwinding options:\n"
"--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
"                       stack will be recorded in perf.data and unwound while\n"
"                       recording by default. Use --post-unwind=yes to switch\n"
"                       to unwind after recording.\n"
"--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
"              will be unwound by default. Use this option to disable the\n"
"              unwinding of the user's stack.\n"
"--no-callchain-joiner  If `--call-graph dwarf` option is used, then by default\n"
"                       callchain joiner is used to break the 64k stack limit\n"
"                       and build more complete call graphs. However, the built\n"
"                       call graphs may not be correct in all cases.\n"
"--callchain-joiner-min-matching-nodes count\n"
"               When callchain joiner is used, set the matched nodes needed to join\n"
"               callchains. The count should be >= 1. By default it is 1.\n"
"--no-cut-samples   Simpleperf uses a record buffer to cache records received from the kernel.\n"
"                   When the available space in the buffer reaches low level, the stack data in\n"
"                   samples is truncated to 1KB. When the available space reaches critical level,\n"
"                   it drops all samples. This option makes simpleperf not truncate stack data\n"
"                   when the available space reaches low level.\n"
"--keep-failed-unwinding-result        Keep reasons for failed unwinding cases\n"
"--keep-failed-unwinding-debug-info    Keep debug info for failed unwinding cases\n"
"\n"
"Sample filter options:\n"
"--exclude-perf                Exclude samples for simpleperf process.\n"
RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
"\n"
"Recording file options:\n"
"--no-dump-build-id        Don't dump build ids in perf.data.\n"
"--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
"                          kernel symbols will be dumped when needed.\n"
"--no-dump-symbols       Don't dump symbols in perf.data. By default symbols are\n"
"                        dumped in perf.data, to support reporting in another\n"
"                        environment.\n"
"-o record_file_name    Set record file name, default is perf.data.\n"
"--size-limit SIZE[K|M|G]      Stop recording after SIZE bytes of records.\n"
"                              Default is unlimited.\n"
"--symfs <dir>    Look for files with symbols relative to this directory.\n"
"                 This option is used to provide files with symbol table and\n"
"                 debug information, which are used for unwinding and dumping symbols.\n"
"--add-meta-info key=value     Add extra meta info, which will be stored in the recording file.\n"
"-z[=<compression_level>]      Compress records using zstd. compression level: 1 is the fastest,\n"
"                              22 is the greatest, 3 is the default.\n"
"\n"
"ETM recording options:\n"
"--addr-filter filter_str1,filter_str2,...\n"
"                Provide address filters for cs-etm instruction tracing.\n"
"                filter_str accepts below formats:\n"
"                  'filter  <addr-range>'  -- trace instructions in a range\n"
"                  'start <addr>'          -- start tracing when ip is <addr>\n"
"                  'stop <addr>'           -- stop tracing when ip is <addr>\n"
"                <addr-range> accepts below formats:\n"
"                  <file_path>                            -- code sections in a binary file\n"
"                  <vaddr_start>-<vaddr_end>@<file_path>  -- part of a binary file\n"
"                  <kernel_addr_start>-<kernel_addr_end>  -- part of kernel space\n"
"                <addr> accepts below formats:\n"
"                  <vaddr>@<file_path>      -- virtual addr in a binary file\n"
"                  <kernel_addr>            -- a kernel address\n"
"                Examples:\n"
"                  'filter 0x456-0x480@/system/lib/libc.so'\n"
"                  'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
"--aux-buffer-size <buffer_size>  Set aux buffer size, only used in cs-etm event type.\n"
"                                 Need to be power of 2 and page size aligned.\n"
"                                 Used memory size is (buffer_size * (cpu_count + 1).\n"
"                                 Default is 4M.\n"
"--decode-etm                     Convert ETM data into branch lists while recording.\n"
"--binary binary_name             Used with --decode-etm to only generate data for binaries\n"
"                                 matching binary_name regex.\n"
"--record-timestamp               Generate timestamp packets in ETM stream.\n"
"--record-cycles                  Generate cycle count packets in ETM stream.\n"
"--cycle-threshold <threshold>    Set cycle count counter threshold for ETM cycle count packets.\n"
"--etm-flush-interval <interval>  Set the interval between ETM data flushes from the ETR buffer\n"
"                                 to the perf event buffer (in milliseconds). Default is 100 ms.\n"
"\n"
"Other options:\n"
"--exit-with-parent            Stop recording when the thread starting simpleperf dies.\n"
"--use-cmd-exit-code           Exit with the same exit code as the monitored cmdline.\n"
"--start_profiling_fd fd_no    After starting profiling, write \"STARTED\" to\n"
"                              <fd_no>, then close <fd_no>.\n"
"--stdio-controls-profiling    Use stdin/stdout to pause/resume profiling.\n"
#if defined(__ANDROID__)
"--in-app                      We are already running in the app's context.\n"
"--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
#endif
#if 0
// Below options are only used internally and shouldn't be visible to the public.
"--out-fd <fd>    Write perf.data to a file descriptor.\n"
"--stop-signal-fd <fd>  Stop recording when fd is readable.\n"
#endif
            // clang-format on
            ),
        system_wide_collection_(false),
        branch_sampling_(0),
        fp_callchain_sampling_(false),
        dwarf_callchain_sampling_(false),
        dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
        unwind_dwarf_callchain_(true),
        post_unwind_(false),
        child_inherit_(true),
        duration_in_sec_(0),
        can_dump_kernel_symbols_(true),
        dump_symbols_(true),
        event_selection_set_(false),
        mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
        record_filename_("perf.data"),
        sample_record_count_(0),
        in_app_context_(false),
        trace_offcpu_(false),
        exclude_kernel_callchain_(false),
        allow_callchain_joiner_(true),
        callchain_joiner_min_matching_nodes_(1u),
        last_record_timestamp_(0u),
        record_filter_(thread_tree_) {
    // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
    // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
    // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
    // finish properly.
    signal(SIGPIPE, SIG_IGN);
  }

  std::string LongHelpString() const override;
  void Run(const std::vector<std::string>& args, int* exit_code) override;
  bool Run(const std::vector<std::string>& args) override {
    int exit_code;
    Run(args, &exit_code);
    return exit_code == 0;
  }

 private:
  bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args,
                    ProbeEvents& probe_events);
  bool AdjustPerfEventLimit();
  bool PrepareRecording(Workload* workload);
  bool DoRecording(Workload* workload);
  bool PostProcessRecording(const std::vector<std::string>& args);
  // pre recording functions
  bool TraceOffCpu();
  bool SetEventSelectionFlags();
  bool CreateAndInitRecordFile();
  std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename,
                                                     const EventAttrIds& attrs);
  bool DumpKernelSymbol();
  bool DumpTracingData();
  bool DumpMaps();
  bool DumpAuxTraceInfo();

  // recording functions
  bool ProcessRecord(Record* record);
  bool ShouldOmitRecord(Record* record);
  bool DumpMapsForRecord(Record* record);
  bool SaveRecordForPostUnwinding(Record* record);
  bool SaveRecordAfterUnwinding(Record* record);
  bool SaveRecordWithoutUnwinding(Record* record);
  bool ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info, bool sync_kernel_records);
  bool ProcessControlCmd(IOEventLoop* loop);
  void UpdateRecord(Record* record);
  bool UnwindRecord(SampleRecord& r);
  bool KeepFailedUnwindingResult(const SampleRecord& r, const std::vector<uint64_t>& ips,
                                 const std::vector<uint64_t>& sps);

  // post recording functions
  std::unique_ptr<RecordFileReader> MoveRecordFile(const std::string& old_filename);
  bool PostUnwindRecords();
  bool JoinCallChains();
  bool DumpAdditionalFeatures(const std::vector<std::string>& args);
  bool DumpBuildIdFeature();
  bool DumpFileFeature();
  bool DumpMetaInfoFeature(bool kernel_symbols_available);
  bool DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set);
  void CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set);
  bool DumpETMBranchListFeature();
  bool DumpInitMapFeature();

  bool system_wide_collection_;
  uint64_t branch_sampling_;
  bool fp_callchain_sampling_;
  bool dwarf_callchain_sampling_;
  uint32_t dump_stack_size_in_dwarf_sampling_;
  bool unwind_dwarf_callchain_;
  bool post_unwind_;
  bool keep_failed_unwinding_result_ = false;
  bool keep_failed_unwinding_debug_info_ = false;
  std::unique_ptr<OfflineUnwinder> offline_unwinder_;
  bool child_inherit_;
  uint64_t delay_in_ms_ = 0;
  double duration_in_sec_;
  bool dump_build_id_ = true;
  bool can_dump_kernel_symbols_;
  bool dump_symbols_;
  std::string clockid_;
  EventSelectionSet event_selection_set_;

  std::pair<size_t, size_t> mmap_page_range_;
  std::optional<size_t> user_buffer_size_;
  size_t aux_buffer_size_ = kDefaultAuxBufferSize;

  ThreadTree thread_tree_;
  std::string record_filename_;
  android::base::unique_fd out_fd_;
  std::unique_ptr<RecordFileWriter> record_file_writer_;
  android::base::unique_fd stop_signal_fd_;

  uint64_t sample_record_count_;
  android::base::unique_fd start_profiling_fd_;
  bool stdio_controls_profiling_ = false;

  std::string app_package_name_;
  bool in_app_context_;
  bool trace_offcpu_;
  bool exclude_kernel_callchain_;
  uint64_t size_limit_in_bytes_ = 0;
  uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
  size_t cpu_time_max_percent_ = 25;

  // For CallChainJoiner
  bool allow_callchain_joiner_;
  size_t callchain_joiner_min_matching_nodes_;
  std::unique_ptr<CallChainJoiner> callchain_joiner_;
  bool allow_truncating_samples_ = true;

  std::unique_ptr<JITDebugReader> jit_debug_reader_;
  uint64_t last_record_timestamp_;  // used to insert Mmap2Records for JIT debug info
  TimeStat time_stat_;
  EventAttrWithId dumping_attr_id_;
  // In system wide recording, record if we have dumped map info for a process.
  std::unordered_set<pid_t> dumped_processes_;
  bool exclude_perf_ = false;
  RecordFilter record_filter_;

  std::optional<MapRecordReader> map_record_reader_;
  std::optional<MapRecordThread> map_record_thread_;

  std::unordered_map<std::string, std::string> extra_meta_info_;
  bool use_cmd_exit_code_ = false;
  std::vector<std::string> add_counters_;

  std::unique_ptr<ETMBranchListGenerator> etm_branch_list_generator_;
  std::unique_ptr<RegEx> binary_name_regex_;
  std::chrono::milliseconds etm_flush_interval_{kDefaultEtmDataFlushIntervalInMs};

  size_t compression_level_ = 0;
};

std::string RecordCommand::LongHelpString() const {
  uint64_t process_buffer_size = 0;
  uint64_t system_wide_buffer_size = 0;
  if (auto size = GetDefaultRecordBufferSize(false); size) {
    process_buffer_size = size.value() / kMegabyte;
  }
  if (auto size = GetDefaultRecordBufferSize(true); size) {
    system_wide_buffer_size = size.value() / kMegabyte;
  }
  std::string buffer_size_str;
  if (process_buffer_size == system_wide_buffer_size) {
    buffer_size_str = android::base::StringPrintf("%" PRIu64 "M", process_buffer_size);
  } else {
    buffer_size_str =
        android::base::StringPrintf("%" PRIu64 "M for process recording and %" PRIu64
                                    "M\n                                 for system wide recording",
                                    process_buffer_size, system_wide_buffer_size);
  }
  return android::base::StringPrintf(long_help_string_.c_str(), buffer_size_str.c_str());
}

void RecordCommand::Run(const std::vector<std::string>& args, int* exit_code) {
  *exit_code = 1;
  time_stat_.prepare_recording_time = GetSystemClock();
  ScopedCurrentArch scoped_arch(GetMachineArch());

  if (!CheckPerfEventLimit()) {
    return;
  }
  AllowMoreOpenedFiles();

  std::vector<std::string> workload_args;
  ProbeEvents probe_events(event_selection_set_);
  if (!ParseOptions(args, &workload_args, probe_events)) {
    return;
  }
  if (!AdjustPerfEventLimit()) {
    return;
  }
  std::unique_ptr<ScopedTempFiles> scoped_temp_files =
      ScopedTempFiles::Create(android::base::Dirname(record_filename_));
  if (!scoped_temp_files) {
    PLOG(ERROR) << "Can't create output file in directory "
                << android::base::Dirname(record_filename_);
    return;
  }
  if (!app_package_name_.empty() && !in_app_context_) {
    // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
    // it will be impossible when using --app. So don't switch to app's context when we are
    // root.
    if (!IsRoot()) {
      // Running simpleperf in app context doesn't allow running child command. So no need to
      // consider exit code of child command here.
      *exit_code = RunInAppContext(app_package_name_, "record", args, workload_args.size(),
                                   record_filename_, true)
                       ? 0
                       : 1;
      return;
    }
  }
  std::unique_ptr<Workload> workload;
  if (!workload_args.empty()) {
    workload = Workload::CreateWorkload(workload_args);
    if (workload == nullptr) {
      return;
    }
  }
  if (!PrepareRecording(workload.get())) {
    return;
  }
  time_stat_.start_recording_time = GetSystemClock();
  if (!DoRecording(workload.get()) || !PostProcessRecording(args)) {
    return;
  }
  if (use_cmd_exit_code_ && workload) {
    workload->WaitChildProcess(false, exit_code);
  } else {
    *exit_code = 0;
  }
}

bool RecordCommand::PrepareRecording(Workload* workload) {
  // 1. Prepare in other modules.
  PrepareVdsoFile();

  // 2. Add default event type.
  if (event_selection_set_.empty()) {
    std::string event_type = default_measured_event_type;
    if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64 ||
        GetTargetArch() == ARCH_RISCV64) {
      // Emulators may not support hardware events. So switch to cpu-clock when cpu-cycles isn't
      // available.
      if (!IsHardwareEventSupported()) {
        event_type = "cpu-clock";
        LOG(INFO) << "Hardware events are not available, switch to cpu-clock.";
      }
    }
    if (!event_selection_set_.AddEventType(event_type)) {
      return false;
    }
  }

  // 3. Process options before opening perf event files.
  exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
  if (trace_offcpu_ && !TraceOffCpu()) {
    return false;
  }
  if (!add_counters_.empty()) {
    if (child_inherit_) {
      LOG(ERROR) << "--no-inherit is needed when using --add-counter.";
      return false;
    }
    if (!event_selection_set_.AddCounters(add_counters_)) {
      return false;
    }
  }
  if (!SetEventSelectionFlags()) {
    return false;
  }
  if (unwind_dwarf_callchain_) {
    bool collect_stat = keep_failed_unwinding_result_;
    offline_unwinder_ = OfflineUnwinder::Create(collect_stat);
  }
  if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
    callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
                                                callchain_joiner_min_matching_nodes_, false));
  }

  // 4. Add monitored targets.
  bool need_to_check_targets = false;
  if (system_wide_collection_) {
    event_selection_set_.AddMonitoredThreads({-1});
  } else if (!event_selection_set_.HasMonitoredTarget()) {
    if (workload != nullptr) {
      event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
      event_selection_set_.SetEnableCondition(false, true);
    } else if (!app_package_name_.empty()) {
      // If app process is not created, wait for it. This allows simpleperf starts before
      // app process. In this way, we can have a better support of app start-up time profiling.
      std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
      event_selection_set_.AddMonitoredProcesses(pids);
      need_to_check_targets = true;
    } else {
      LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help";
      return false;
    }
  } else {
    need_to_check_targets = true;
  }
  if (delay_in_ms_ != 0 || event_selection_set_.HasAuxTrace()) {
    event_selection_set_.SetEnableCondition(false, false);
  }

  // Profiling JITed/interpreted Java code is supported starting from Android P.
  // Also support profiling art interpreter on host.
  if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
    // JIT symfiles are stored in temporary files, and are deleted after recording. But if
    // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
    // the debug-unwind cmd.
    auto symfile_option = (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_)
                              ? JITDebugReader::SymFileOption::kKeepSymFiles
                              : JITDebugReader::SymFileOption::kDropSymFiles;
    auto sync_option = (clockid_ == "monotonic") ? JITDebugReader::SyncOption::kSyncWithRecords
                                                 : JITDebugReader::SyncOption::kNoSync;
    jit_debug_reader_.reset(new JITDebugReader(record_filename_, symfile_option, sync_option));
    // To profile java code, need to dump maps containing vdex files, which are not executable.
    event_selection_set_.SetRecordNotExecutableMaps(true);
  }

  // 5. Open perf event files and create mapped buffers.
  if (!event_selection_set_.OpenEventFiles()) {
    return false;
  }
  size_t record_buffer_size = 0;
  if (user_buffer_size_.has_value()) {
    record_buffer_size = user_buffer_size_.value();
  } else {
    auto default_size = GetDefaultRecordBufferSize(system_wide_collection_);
    if (!default_size.has_value()) {
      return false;
    }
    record_buffer_size = default_size.value();
  }
  if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
                                           aux_buffer_size_, record_buffer_size,
                                           allow_truncating_samples_, exclude_perf_)) {
    return false;
  }
  auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
  if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
    return false;
  }

  // 6. Create perf.data.
  if (!CreateAndInitRecordFile()) {
    return false;
  }

  // 7. Add read/signal/periodic Events.
  if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
    return false;
  }
  IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
  auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
  if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback, IOEventHighPriority)) {
    return false;
  }

  // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
  if (!SignalIsIgnored(SIGHUP)) {
    if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback, IOEventHighPriority)) {
      return false;
    }
  }
  if (stop_signal_fd_ != -1) {
    if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback, IOEventHighPriority)) {
      return false;
    }
  }

  if (delay_in_ms_ != 0) {
    auto delay_callback = [this]() {
      if (!event_selection_set_.SetEnableEvents(true)) {
        return false;
      }
      if (!system_wide_collection_) {
        // Dump maps in case there are new maps created while delaying.
        return DumpMaps();
      }
      return true;
    };
    if (!loop->AddOneTimeEvent(SecondToTimeval(delay_in_ms_ / 1000), delay_callback)) {
      return false;
    }
  }
  if (duration_in_sec_ != 0) {
    if (!loop->AddPeriodicEvent(
            SecondToTimeval(duration_in_sec_), [loop]() { return loop->ExitLoop(); },
            IOEventHighPriority)) {
      return false;
    }
  }
  if (stdio_controls_profiling_) {
    if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
      return false;
    }
  }
  if (jit_debug_reader_) {
    auto callback = [this](std::vector<JITDebugInfo> debug_info, bool sync_kernel_records) {
      return ProcessJITDebugInfo(std::move(debug_info), sync_kernel_records);
    };
    if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
      return false;
    }
    if (!system_wide_collection_) {
      std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
      for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
        pid_t pid;
        if (GetProcessForThread(tid, &pid)) {
          pids.insert(pid);
        }
      }
      for (pid_t pid : pids) {
        if (!jit_debug_reader_->MonitorProcess(pid)) {
          return false;
        }
      }
      if (!jit_debug_reader_->ReadAllProcesses()) {
        return false;
      }
    }
  }
  if (event_selection_set_.HasAuxTrace()) {
    // ETM events can only be enabled successfully after MmapEventFiles().
    if (delay_in_ms_ == 0 && !event_selection_set_.IsEnabledOnExec()) {
      if (!event_selection_set_.EnableETMEvents()) {
        return false;
      }
    }
    // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens
    // either when all monitored threads are scheduled off cpu, or when all etm perf events are
    // disabled.
    // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This
    // makes less than expected data, especially in system wide recording. So add a periodic event
    // to flush etm data by temporarily disable all perf events.
    auto etm_flush = [this]() {
      return event_selection_set_.DisableETMEvents() && event_selection_set_.EnableETMEvents();
    };
    if (!loop->AddPeriodicEvent(SecondToTimeval(etm_flush_interval_.count() / 1000.0), etm_flush)) {
      return false;
    }

    if (etm_branch_list_generator_) {
      if (exclude_perf_) {
        etm_branch_list_generator_->SetExcludePid(getpid());
      }
      if (binary_name_regex_) {
        etm_branch_list_generator_->SetBinaryFilter(binary_name_regex_.get());
      }
    }
  }
  return true;
}

bool RecordCommand::DoRecording(Workload* workload) {
  // Write records in mapped buffers of perf_event_files to output file while workload is running.
  if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
    return false;
  }
  if (start_profiling_fd_.get() != -1) {
    if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
      PLOG(ERROR) << "failed to write to start_profiling_fd_";
    }
    start_profiling_fd_.reset();
  }
  if (stdio_controls_profiling_) {
    printf("started\n");
    fflush(stdout);
  }
  if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
    return false;
  }
  time_stat_.stop_recording_time = GetSystemClock();
  if (event_selection_set_.HasAuxTrace()) {
    // Disable ETM events to flush the last ETM data.
    if (!event_selection_set_.DisableETMEvents()) {
      return false;
    }
  }
  if (!event_selection_set_.SyncKernelBuffer()) {
    return false;
  }
  event_selection_set_.CloseEventFiles();
  time_stat_.finish_recording_time = GetSystemClock();
  uint64_t recording_time = time_stat_.finish_recording_time - time_stat_.start_recording_time;
  LOG(INFO) << "Recorded for " << recording_time / 1e9 << " seconds. Start post processing.";
  return true;
}

static bool WriteRecordDataToOutFd(const std::string& in_filename,
                                   android::base::unique_fd out_fd) {
  android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
  if (in_fd == -1) {
    PLOG(ERROR) << "Failed to open " << in_filename;
    return false;
  }
  char buf[8192];
  while (true) {
    ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
    if (n < 0) {
      PLOG(ERROR) << "Failed to read " << in_filename;
      return false;
    }
    if (n == 0) {
      break;
    }
    if (!android::base::WriteFully(out_fd, buf, n)) {
      PLOG(ERROR) << "Failed to write to out_fd";
      return false;
    }
  }
  unlink(in_filename.c_str());
  return true;
}

bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
  // 1. Read records left in the buffer.
  if (!event_selection_set_.FinishReadMmapEventData()) {
    return false;
  }

  // 2. Post unwind dwarf callchain.
  if (unwind_dwarf_callchain_ && post_unwind_) {
    if (!PostUnwindRecords()) {
      return false;
    }
  }

  // 3. Optionally join Callchains.
  if (callchain_joiner_) {
    JoinCallChains();
  }

  // 4. Dump additional features, and close record file.
  if (!record_file_writer_->FinishWritingDataSection()) {
    return false;
  }
  if (!DumpAdditionalFeatures(args)) {
    return false;
  }
  if (!record_file_writer_->Close()) {
    return false;
  }
  if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
    return false;
  }
  time_stat_.post_process_time = GetSystemClock();

  // 5. Show brief record result.
  auto report_compression_stat = [&]() {
    if (auto compressor = record_file_writer_->GetCompressor(); compressor != nullptr) {
      uint64_t original_size = compressor->TotalInputSize();
      uint64_t compressed_size = compressor->TotalOutputSize();
      LOG(INFO) << "Record compressed: " << ReadableBytes(compressed_size) << " (original "
                << ReadableBytes(original_size) << ", ratio " << std::setprecision(2)
                << (static_cast<double>(original_size) / compressed_size) << ")";
    }
  };

  auto record_stat = event_selection_set_.GetRecordStat();
  if (event_selection_set_.HasAuxTrace()) {
    LOG(INFO) << "Aux data traced: " << ReadableCount(record_stat.aux_data_size);
    if (record_stat.lost_aux_data_size != 0) {
      LOG(INFO) << "Aux data lost in user space: " << ReadableCount(record_stat.lost_aux_data_size)
                << ", consider increasing userspace buffer size(--user-buffer-size).";
    }
    report_compression_stat();
  } else {
    // Here we report all lost records as samples. This isn't accurate. Because records like
    // MmapRecords are not samples. But It's easier for users to understand.
    size_t userspace_lost_samples =
        record_stat.userspace_lost_samples + record_stat.userspace_lost_non_samples;
    size_t lost_samples = record_stat.kernelspace_lost_records + userspace_lost_samples;

    std::stringstream os;
    os << "Samples recorded: " << ReadableCount(sample_record_count_);
    if (record_stat.userspace_truncated_stack_samples > 0) {
      os << " (" << ReadableCount(record_stat.userspace_truncated_stack_samples)
         << " with truncated stacks)";
    }
    os << ". Samples lost: " << ReadableCount(lost_samples);
    if (lost_samples != 0) {
      os << " (kernelspace: " << ReadableCount(record_stat.kernelspace_lost_records)
         << ", userspace: " << ReadableCount(userspace_lost_samples) << ")";
    }
    os << ".";
    LOG(INFO) << os.str();
    report_compression_stat();

    LOG(DEBUG) << "Record stat: kernelspace_lost_records="
               << ReadableCount(record_stat.kernelspace_lost_records)
               << ", userspace_lost_samples=" << ReadableCount(record_stat.userspace_lost_samples)
               << ", userspace_lost_non_samples="
               << ReadableCount(record_stat.userspace_lost_non_samples)
               << ", userspace_truncated_stack_samples="
               << ReadableCount(record_stat.userspace_truncated_stack_samples);

    if (sample_record_count_ + record_stat.kernelspace_lost_records != 0) {
      double kernelspace_lost_percent =
          static_cast<double>(record_stat.kernelspace_lost_records) /
          (record_stat.kernelspace_lost_records + sample_record_count_);
      constexpr double KERNELSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
      if (kernelspace_lost_percent >= KERNELSPACE_LOST_PERCENT_WARNING_BAR) {
        LOG(WARNING) << "Lost " << (kernelspace_lost_percent * 100)
                     << "% of samples in kernel space, "
                     << "consider increasing kernel buffer size(-m), "
                     << "or decreasing sample frequency(-f), "
                     << "or increasing sample period(-c).";
      }
    }
    size_t userspace_lost_truncated_samples =
        userspace_lost_samples + record_stat.userspace_truncated_stack_samples;
    size_t userspace_complete_samples =
        sample_record_count_ - record_stat.userspace_truncated_stack_samples;
    if (userspace_complete_samples + userspace_lost_truncated_samples != 0) {
      double userspace_lost_percent =
          static_cast<double>(userspace_lost_truncated_samples) /
          (userspace_complete_samples + userspace_lost_truncated_samples);
      constexpr double USERSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
      if (userspace_lost_percent >= USERSPACE_LOST_PERCENT_WARNING_BAR) {
        LOG(WARNING) << "Lost/Truncated " << (userspace_lost_percent * 100)
                     << "% of samples in user space, "
                     << "consider increasing userspace buffer size(--user-buffer-size), "
                     << "or decreasing sample frequency(-f), "
                     << "or increasing sample period(-c).";
      }
    }
    if (callchain_joiner_) {
      callchain_joiner_->DumpStat();
    }
  }
  LOG(DEBUG) << "Prepare recording time "
             << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e9
             << " s, recording time "
             << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e9
             << " s, stop recording time "
             << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e9
             << " s, post process time "
             << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e9 << " s.";
  return true;
}

bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
                                 std::vector<std::string>* non_option_args,
                                 ProbeEvents& probe_events) {
  OptionValueMap options;
  std::vector<std::pair<OptionName, OptionValue>> ordered_options;

  if (!PreprocessOptions(args, GetRecordCmdOptionFormats(), &options, &ordered_options,
                         non_option_args)) {
    return false;
  }

  // Process options.
  system_wide_collection_ = options.PullBoolValue("-a");

  if (auto value = options.PullValue("--add-counter"); value) {
    add_counters_ = android::base::Split(value->str_value, ",");
  }

  for (const OptionValue& value : options.PullValues("--add-meta-info")) {
    const std::string& s = value.str_value;
    auto split_pos = s.find('=');
    if (split_pos == std::string::npos || split_pos == 0 || split_pos + 1 == s.size()) {
      LOG(ERROR) << "invalid meta-info: " << s;
      return false;
    }
    extra_meta_info_[s.substr(0, split_pos)] = s.substr(split_pos + 1);
  }

  if (auto value = options.PullValue("--addr-filter"); value) {
    auto filters = ParseAddrFilterOption(value->str_value);
    if (filters.empty()) {
      return false;
    }
    event_selection_set_.SetAddrFilters(std::move(filters));
  }

  if (auto value = options.PullValue("--app"); value) {
    app_package_name_ = value->str_value;
  }

  if (auto value = options.PullValue("--aux-buffer-size"); value) {
    uint64_t v = value->uint_value;
    if (v > std::numeric_limits<size_t>::max() || !IsPowerOfTwo(v) || v % sysconf(_SC_PAGE_SIZE)) {
      LOG(ERROR) << "invalid aux buffer size: " << v;
      return false;
    }
    aux_buffer_size_ = static_cast<size_t>(v);
  }

  if (options.PullValue("-b")) {
    branch_sampling_ = branch_sampling_type_map["any"];
  }

  if (auto value = options.PullValue("--binary"); value) {
    binary_name_regex_ = RegEx::Create(value->str_value);
    if (binary_name_regex_ == nullptr) {
      return false;
    }
  }

  if (!options.PullUintValue("--callchain-joiner-min-matching-nodes",
                             &callchain_joiner_min_matching_nodes_, 1)) {
    return false;
  }

  if (auto value = options.PullValue("--clockid"); value) {
    clockid_ = value->str_value;
    if (clockid_ != "perf") {
      if (!IsSettingClockIdSupported()) {
        LOG(ERROR) << "Setting clockid is not supported by the kernel.";
        return false;
      }
      if (clockid_map.find(clockid_) == clockid_map.end()) {
        LOG(ERROR) << "Invalid clockid: " << clockid_;
        return false;
      }
    }
  }

  if (!options.PullUintValue("--cpu-percent", &cpu_time_max_percent_, 1, 100)) {
    return false;
  }

  if (options.PullBoolValue("--decode-etm")) {
    etm_branch_list_generator_ = ETMBranchListGenerator::Create(system_wide_collection_);
  }
  uint32_t interval = 0;
  if (options.PullUintValue("--etm-flush-interval", &interval) && interval != 0) {
    etm_flush_interval_ = std::chrono::milliseconds(interval);
  }

  if (options.PullBoolValue("--record-timestamp")) {
    ETMRecorder& recorder = ETMRecorder::GetInstance();
    recorder.SetRecordTimestamp(true);
  }

  if (options.PullBoolValue("--record-cycles")) {
    ETMRecorder& recorder = ETMRecorder::GetInstance();
    recorder.SetRecordCycles(true);
  }

  if (!options.PullUintValue("--delay", &delay_in_ms_)) {
    return false;
  }

  size_t cyc_threshold;
  if (options.PullUintValue("--cycle-threshold", &cyc_threshold)) {
    ETMRecorder& recorder = ETMRecorder::GetInstance();
    recorder.SetCycleThreshold(cyc_threshold);
  }

  if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
    return false;
  }

  exclude_perf_ = options.PullBoolValue("--exclude-perf");
  if (!record_filter_.ParseOptions(options)) {
    return false;
  }

  if (options.PullValue("--exit-with-parent")) {
    prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
  }

  in_app_context_ = options.PullBoolValue("--in-app");

  for (const OptionValue& value : options.PullValues("-j")) {
    std::vector<std::string> branch_sampling_types = android::base::Split(value.str_value, ",");
    for (auto& type : branch_sampling_types) {
      auto it = branch_sampling_type_map.find(type);
      if (it == branch_sampling_type_map.end()) {
        LOG(ERROR) << "unrecognized branch sampling filter: " << type;
        return false;
      }
      branch_sampling_ |= it->second;
    }
  }
  keep_failed_unwinding_result_ = options.PullBoolValue("--keep-failed-unwinding-result");
  keep_failed_unwinding_debug_info_ = options.PullBoolValue("--keep-failed-unwinding-debug-info");
  if (keep_failed_unwinding_debug_info_) {
    keep_failed_unwinding_result_ = true;
  }

  for (const OptionValue& value : options.PullValues("--kprobe")) {
    std::vector<std::string> cmds = android::base::Split(value.str_value, ",");
    for (const auto& cmd : cmds) {
      if (!probe_events.AddKprobe(cmd)) {
        return false;
      }
    }
  }

  if (auto value = options.PullValue("-m"); value) {
    if (!IsPowerOfTwo(value->uint_value) ||
        value->uint_value > std::numeric_limits<size_t>::max()) {
      LOG(ERROR) << "Invalid mmap_pages: '" << value->uint_value << "'";
      return false;
    }
    mmap_page_range_.first = mmap_page_range_.second = value->uint_value;
  }

  allow_callchain_joiner_ = !options.PullBoolValue("--no-callchain-joiner");
  allow_truncating_samples_ = !options.PullBoolValue("--no-cut-samples");
  dump_build_id_ = !options.PullBoolValue("--no-dump-build-id");
  can_dump_kernel_symbols_ = !options.PullBoolValue("--no-dump-kernel-symbols");
  dump_symbols_ = !options.PullBoolValue("--no-dump-symbols");
  if (auto value = options.PullValue("--no-inherit"); value) {
    child_inherit_ = false;
  } else if (system_wide_collection_) {
    // child_inherit is used to monitor newly created threads. It isn't useful in system wide
    // collection, which monitors all threads running on selected cpus.
    child_inherit_ = false;
  }
  unwind_dwarf_callchain_ = !options.PullBoolValue("--no-unwind");

  if (auto value = options.PullValue("-o"); value) {
    record_filename_ = value->str_value;
  }

  if (auto value = options.PullValue("--out-fd"); value) {
    out_fd_.reset(static_cast<int>(value->uint_value));
  }

  if (auto strs = options.PullStringValues("-p"); !strs.empty()) {
    if (auto pids = GetPidsFromStrings(strs, true, true); pids) {
      event_selection_set_.AddMonitoredProcesses(pids.value());
    } else {
      return false;
    }
  }

  // Use explicit if statements instead of logical operators to avoid short-circuit.
  if (options.PullValue("--post-unwind")) {
    post_unwind_ = true;
  }
  if (options.PullValue("--post-unwind=yes")) {
    post_unwind_ = true;
  }
  if (options.PullValue("--post-unwind=no")) {
    post_unwind_ = false;
  }

  if (auto value = options.PullValue("--user-buffer-size"); value) {
    uint64_t v = value->uint_value;
    if (v > std::numeric_limits<size_t>::max() || v == 0) {
      LOG(ERROR) << "invalid user buffer size: " << v;
      return false;
    }
    user_buffer_size_ = static_cast<size_t>(v);
  }

  if (!options.PullUintValue("--size-limit", &size_limit_in_bytes_, 1)) {
    return false;
  }

  if (auto value = options.PullValue("--start_profiling_fd"); value) {
    start_profiling_fd_.reset(static_cast<int>(value->uint_value));
  }

  stdio_controls_profiling_ = options.PullBoolValue("--stdio-controls-profiling");

  if (auto value = options.PullValue("--stop-signal-fd"); value) {
    stop_signal_fd_.reset(static_cast<int>(value->uint_value));
  }

  if (auto value = options.PullValue("--symfs"); value) {
    if (!Dso::SetSymFsDir(value->str_value)) {
      return false;
    }
  }

  for (const OptionValue& value : options.PullValues("-t")) {
    if (auto tids = GetTidsFromString(value.str_value, true); tids) {
      event_selection_set_.AddMonitoredThreads(tids.value());
    } else {
      return false;
    }
  }

  trace_offcpu_ = options.PullBoolValue("--trace-offcpu");

  if (auto value = options.PullValue("--tracepoint-events"); value) {
    if (!EventTypeManager::Instance().ReadTracepointsFromFile(value->str_value)) {
      return false;
    }
  }
  use_cmd_exit_code_ = options.PullBoolValue("--use-cmd-exit-code");

  if (auto value = options.PullValue("-z"); value) {
    if (value->str_value.empty()) {
      // 3 is the default compression level of zstd library, in ZSTD_defaultCLevel().
      constexpr size_t DEFAULT_COMPRESSION_LEVEL = 3;
      compression_level_ = DEFAULT_COMPRESSION_LEVEL;
    } else {
      if (!android::base::ParseUint(value->str_value, &compression_level_) ||
          compression_level_ < 1 || compression_level_ > 22) {
        LOG(ERROR) << "invalid compression level for -z: " << value->str_value;
        return false;
      }
    }
  }

  CHECK(options.values.empty());

  // Process ordered options.
  for (const auto& pair : ordered_options) {
    const OptionName& name = pair.first;
    const OptionValue& value = pair.second;

    if (name == "-c" || name == "-f") {
      if (value.uint_value < 1) {
        LOG(ERROR) << "invalid " << name << ": " << value.uint_value;
        return false;
      }
      SampleRate rate;
      if (name == "-c") {
        rate.sample_period = value.uint_value;
      } else {
        if (value.uint_value >= INT_MAX) {
          LOG(ERROR) << "sample freq can't be bigger than INT_MAX: " << value.uint_value;
          return false;
        }
        rate.sample_freq = value.uint_value;
      }
      event_selection_set_.SetSampleRateForNewEvents(rate);

    } else if (name == "--call-graph") {
      std::vector<std::string> strs = android::base::Split(value.str_value, ",");
      if (strs[0] == "fp") {
        fp_callchain_sampling_ = true;
        dwarf_callchain_sampling_ = false;
      } else if (strs[0] == "dwarf") {
        fp_callchain_sampling_ = false;
        dwarf_callchain_sampling_ = true;
        if (strs.size() > 1) {
          uint64_t size;
          if (!ParseUint(strs[1], &size)) {
            LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
            return false;
          }
          if ((size & 7) != 0) {
            LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned.";
            return false;
          }
          if (size >= MAX_DUMP_STACK_SIZE) {
            LOG(ERROR) << "dump stack size " << size << " is bigger than max allowed size "
                       << MAX_DUMP_STACK_SIZE << ".";
            return false;
          }
          dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
        }
      }

    } else if (name == "--cpu") {
      if (auto cpus = GetCpusFromString(value.str_value); cpus) {
        event_selection_set_.SetCpusForNewEvents(
            std::vector<int>(cpus.value().begin(), cpus.value().end()));
      } else {
        return false;
      }
    } else if (name == "-e") {
      std::vector<std::string> event_types = android::base::Split(value.str_value, ",");
      for (auto& event_type : event_types) {
        if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
          return false;
        }
        if (!event_selection_set_.AddEventType(event_type)) {
          return false;
        }
      }
    } else if (name == "-g") {
      fp_callchain_sampling_ = false;
      dwarf_callchain_sampling_ = true;
    } else if (name == "--group") {
      std::vector<std::string> event_types = android::base::Split(value.str_value, ",");
      for (const auto& event_type : event_types) {
        if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
          return false;
        }
      }
      if (!event_selection_set_.AddEventGroup(event_types)) {
        return false;
      }
    } else if (name == "--tp-filter") {
      if (!event_selection_set_.SetTracepointFilter(value.str_value)) {
        return false;
      }
    } else {
      LOG(ERROR) << "unprocessed option: " << name;
      return false;
    }
  }

  if (!dwarf_callchain_sampling_) {
    if (!unwind_dwarf_callchain_) {
      LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option.";
      return false;
    }
    unwind_dwarf_callchain_ = false;
  }
  if (post_unwind_) {
    if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
      post_unwind_ = false;
    }
  }

  if (fp_callchain_sampling_) {
    if (GetTargetArch() == ARCH_ARM) {
      LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
                   << "consider using `-g` option or profiling on aarch64 architecture.";
    }
  }

  if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
    LOG(ERROR) << "Record system wide and existing processes/threads can't be "
                  "used at the same time.";
    return false;
  }

  if (system_wide_collection_ && !IsRoot()) {
    LOG(ERROR) << "System wide profiling needs root privilege.";
    return false;
  }

  if (dump_symbols_ && can_dump_kernel_symbols_) {
    // No need to dump kernel symbols as we will dump all required symbols.
    can_dump_kernel_symbols_ = false;
  }
  if (clockid_.empty()) {
    clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
  }
  return true;
}

bool RecordCommand::AdjustPerfEventLimit() {
  bool set_prop = false;
  // 1. Adjust max_sample_rate.
  uint64_t cur_max_freq;
  if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
      !SetMaxSampleFrequency(max_sample_freq_)) {
    set_prop = true;
  }
  // 2. Adjust perf_cpu_time_max_percent.
  size_t cur_percent;
  if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
      !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
    set_prop = true;
  }
  // 3. Adjust perf_event_mlock_kb.
  long cpus = sysconf(_SC_NPROCESSORS_CONF);
  uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
  if (event_selection_set_.HasAuxTrace()) {
    mlock_kb += cpus * aux_buffer_size_ / 1024;
  }
  uint64_t cur_mlock_kb;
  if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
      !SetPerfEventMlockKb(mlock_kb)) {
    set_prop = true;
  }

  if (GetAndroidVersion() >= kAndroidVersionQ && set_prop && !in_app_context_) {
    return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
                              std::max(mlock_kb, cur_mlock_kb));
  }
  return true;
}

bool RecordCommand::TraceOffCpu() {
  if (FindEventTypeByName("sched:sched_switch") == nullptr) {
    LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
    return false;
  }
  for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
    if (event_type->name == "sched:sched_switch") {
      LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
      return false;
    }
  }
  if (!IsDumpingRegsForTracepointEventsSupported()) {
    LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
    return false;
  }
  // --trace-offcpu option only works with one of the selected event types.
  std::set<std::string> accepted_events = {"cpu-clock", "task-clock"};
  std::vector<const EventType*> events = event_selection_set_.GetEvents();
  if (events.size() != 1 || accepted_events.find(events[0]->name) == accepted_events.end()) {
    LOG(ERROR) << "--trace-offcpu option only works with one of events "
               << android::base::Join(accepted_events, ' ');
    return false;
  }
  if (!event_selection_set_.AddEventType("sched:sched_switch", SampleRate(0, 1))) {
    return false;
  }
  if (IsSwitchRecordSupported()) {
    event_selection_set_.EnableSwitchRecord();
  }
  return true;
}

bool RecordCommand::SetEventSelectionFlags() {
  event_selection_set_.SampleIdAll();
  if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
    return false;
  }
  if (fp_callchain_sampling_) {
    event_selection_set_.EnableFpCallChainSampling();
  } else if (dwarf_callchain_sampling_) {
    if (!event_selection_set_.EnableDwarfCallChainSampling(dump_stack_size_in_dwarf_sampling_)) {
      return false;
    }
  }
  event_selection_set_.SetInherit(child_inherit_);
  if (clockid_ != "perf") {
    event_selection_set_.SetClockId(clockid_map[clockid_]);
  }
  return true;
}

bool RecordCommand::CreateAndInitRecordFile() {
  EventAttrIds attrs = event_selection_set_.GetEventAttrWithId();
  bool remove_regs_and_stacks = unwind_dwarf_callchain_ && !post_unwind_;
  if (remove_regs_and_stacks) {
    for (auto& attr : attrs) {
      ReplaceRegAndStackWithCallChain(attr.attr);
    }
  }
  record_file_writer_ = CreateRecordFile(record_filename_, attrs);
  if (record_file_writer_ == nullptr) {
    return false;
  }
  // Use first perf_event_attr and first event id to dump mmap and comm records.
  CHECK(!attrs.empty());
  dumping_attr_id_ = attrs[0];
  CHECK(!dumping_attr_id_.ids.empty());
  map_record_reader_.emplace(dumping_attr_id_.attr, dumping_attr_id_.ids[0],
                             event_selection_set_.RecordNotExecutableMaps());
  map_record_reader_->SetCallback([this](Record* r) { return ProcessRecord(r); });

  return DumpKernelSymbol() && DumpTracingData() && DumpMaps() && DumpAuxTraceInfo();
}

std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename,
                                                                  const EventAttrIds& attrs) {
  std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
  if (!writer) {
    return nullptr;
  }
  if (compression_level_ != 0 && !writer->SetCompressionLevel(compression_level_)) {
    return nullptr;
  }
  if (!writer->WriteAttrSection(attrs)) {
    return nullptr;
  }
  return writer;
}

bool RecordCommand::DumpKernelSymbol() {
  if (can_dump_kernel_symbols_) {
    if (event_selection_set_.NeedKernelSymbol()) {
      std::string kallsyms;
      if (!LoadKernelSymbols(&kallsyms)) {
        // Symbol loading may have failed due to the lack of permissions. This
        // is not fatal, the symbols will appear as "unknown".
        return true;
      }
      KernelSymbolRecord r(kallsyms);
      if (!ProcessRecord(&r)) {
        return false;
      }
    }
  }
  return true;
}

bool RecordCommand::DumpTracingData() {
  std::vector<const EventType*> tracepoint_event_types = event_selection_set_.GetTracepointEvents();
  if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
    return true;  // No need to dump tracing data, or can't do it.
  }
  std::vector<char> tracing_data;
  if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
    return false;
  }
  TracingDataRecord record(tracing_data);
  if (!ProcessRecord(&record)) {
    return false;
  }
  return true;
}

bool RecordCommand::DumpMaps() {
  if (system_wide_collection_) {
    // For system wide recording:
    //   If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the
    //   first time a sample hits that process).
    //   If aux tracing with decoding etm data, the maps are dumped by etm_branch_list_generator.
    //   If aux tracing without decoding etm data, we don't know which maps will be needed, so dump
    //   all process maps. To reduce pre recording time, we dump process maps in map record thread
    //   while recording.
    if (event_selection_set_.HasAuxTrace() && !etm_branch_list_generator_) {
      map_record_thread_.emplace(*map_record_reader_);
      return true;
    }
    if (!event_selection_set_.ExcludeKernel()) {
      return map_record_reader_->ReadKernelMaps();
    }
    return true;
  }
  if (!event_selection_set_.ExcludeKernel() && !map_record_reader_->ReadKernelMaps()) {
    return false;
  }
  // Map from process id to a set of thread ids in that process.
  std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
  for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
    std::vector<pid_t> tids = GetThreadsInProcess(pid);
    process_map[pid].insert(tids.begin(), tids.end());
  }
  for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
    pid_t pid;
    if (GetProcessForThread(tid, &pid)) {
      process_map[pid].insert(tid);
    }
  }

  // Dump each process.
  for (const auto& [pid, tids] : process_map) {
    if (!map_record_reader_->ReadProcessMaps(pid, tids, 0)) {
      return false;
    }
  }
  return true;
}

bool RecordCommand::ProcessRecord(Record* record) {
  UpdateRecord(record);
  if (ShouldOmitRecord(record)) {
    return true;
  }
  if (size_limit_in_bytes_ > 0u) {
    if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
      return event_selection_set_.GetIOEventLoop()->ExitLoop();
    }
  }
  if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
    return false;
  }
  last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
  // In system wide recording, maps are dumped when they are needed by records.
  if (system_wide_collection_ && !DumpMapsForRecord(record)) {
    return false;
  }
  // Record filter check should go after DumpMapsForRecord(). Otherwise, process/thread name
  // filters don't work in system wide collection.
  if (record->type() == PERF_RECORD_SAMPLE) {
    if (!record_filter_.Check(static_cast<SampleRecord&>(*record))) {
      return true;
    }
  }
  if (etm_branch_list_generator_) {
    bool consumed = false;
    if (!etm_branch_list_generator_->ProcessRecord(*record, consumed)) {
      return false;
    }
    if (consumed) {
      return true;
    }
  }
  if (unwind_dwarf_callchain_) {
    if (post_unwind_) {
      return SaveRecordForPostUnwinding(record);
    }
    return SaveRecordAfterUnwinding(record);
  }
  return SaveRecordWithoutUnwinding(record);
}

bool RecordCommand::DumpAuxTraceInfo() {
  if (event_selection_set_.HasAuxTrace()) {
    AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
    return ProcessRecord(&auxtrace_info);
  }
  return true;
}

template <typename MmapRecordType>
bool MapOnlyExistInMemory(MmapRecordType* record) {
  return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
}

bool RecordCommand::ShouldOmitRecord(Record* record) {
  if (jit_debug_reader_) {
    // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
    // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
    // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
    // entries for unwinding, as in http://b/77236599. So it is better to remove
    // dalvik-jit-code-cache and other maps that only exist in memory.
    switch (record->type()) {
      case PERF_RECORD_MMAP:
        return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
      case PERF_RECORD_MMAP2:
        return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
    }
  }
  return false;
}

bool RecordCommand::DumpMapsForRecord(Record* record) {
  if (record->type() == PERF_RECORD_SAMPLE) {
    pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
    if (dumped_processes_.find(pid) == dumped_processes_.end()) {
      // Dump map info and all thread names for that process.
      if (!map_record_reader_->ReadProcessMaps(pid, last_record_timestamp_)) {
        return false;
      }
      dumped_processes_.insert(pid);
    }
  }
  return true;
}

bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
  if (!record_file_writer_->WriteRecord(*record)) {
    LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
               << "--no-post-unwind option.";
    return false;
  }
  return true;
}

bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
  if (record->type() == PERF_RECORD_SAMPLE) {
    auto& r = *static_cast<SampleRecord*>(record);
    // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
    // to adjust callchains generated by dwarf unwinder.
    r.AdjustCallChainGeneratedByKernel();
    if (!UnwindRecord(r)) {
      return false;
    }
    // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
    // chain.
    if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
      // If current record contains no user callchain, skip it.
      return true;
    }
    sample_record_count_++;
  } else {
    thread_tree_.Update(*record);
  }
  return record_file_writer_->WriteRecord(*record);
}

bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
  if (record->type() == PERF_RECORD_SAMPLE) {
    auto& r = *static_cast<SampleRecord*>(record);
    if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
      r.AdjustCallChainGeneratedByKernel();
    }
    if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
      // If current record contains no user callchain, skip it.
      return true;
    }
    sample_record_count_++;
  }
  return record_file_writer_->WriteRecord(*record);
}

bool RecordCommand::ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info,
                                        bool sync_kernel_records) {
  for (auto& info : debug_info) {
    if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
      uint64_t timestamp =
          jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
      Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, info.jit_code_addr,
                         info.jit_code_len, info.file_offset, map_flags::PROT_JIT_SYMFILE_MAP,
                         info.file_path, dumping_attr_id_.ids[0], timestamp);
      if (!ProcessRecord(&record)) {
        return false;
      }
    } else {
      if (!info.symbols.empty()) {
        Dso* dso = thread_tree_.FindUserDsoOrNew(info.file_path, 0, DSO_DEX_FILE);
        dso->SetSymbols(&info.symbols);
      }
      if (info.dex_file_map) {
        ThreadMmap& map = *info.dex_file_map;
        uint64_t timestamp =
            jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
        Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, map.start_addr,
                           map.len, map.pgoff, map.prot, map.name, dumping_attr_id_.ids[0],
                           timestamp);
        if (!ProcessRecord(&record)) {
          return false;
        }
      }
      thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
    }
  }
  // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
  // generated after them. So process existing samples each time generating new JIT maps. We prefer
  // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
  // maps, and we want to report them properly.
  if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
    return false;
  }
  return true;
}

bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
  char* line = nullptr;
  size_t line_length = 0;
  if (getline(&line, &line_length, stdin) == -1) {
    free(line);
    // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
    // So we may see EOF of stdin.
    return loop->ExitLoop();
  }
  std::string cmd = android::base::Trim(line);
  free(line);
  LOG(DEBUG) << "process control cmd: " << cmd;
  bool result = false;
  if (cmd == "pause") {
    result = event_selection_set_.SetEnableEvents(false);
  } else if (cmd == "resume") {
    result = event_selection_set_.SetEnableEvents(true);
  } else {
    LOG(ERROR) << "unknown control cmd: " << cmd;
  }
  printf("%s\n", result ? "ok" : "error");
  fflush(stdout);
  return result;
}

template <class RecordType>
void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
  if (r.InKernel()) {
    return;
  }
  std::string filename = r.filename;
  bool name_changed = false;
  // Some vdex files in map files are marked with deleted flag, but they exist in the file
  // system.
  // It may be because a new file is used to replace the old one, but still worth to try.
  if (android::base::EndsWith(filename, " (deleted)")) {
    filename.resize(filename.size() - 10);
    name_changed = true;
  }
  if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
    // For the case of a shared library "foobar.so" embedded
    // inside an APK, we rewrite the original MMAP from
    // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
    // so as to make the library name explicit. This update is
    // done here (as part of the record operation) as opposed to
    // on the host during the report, since we want to report
    // the correct library name even if the the APK in question
    // is not present on the host. The new offset W is
    // calculated to be with respect to the start of foobar.so,
    // not to the start of path.apk.
    EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
    if (ee != nullptr) {
      // Compute new offset relative to start of elf in APK.
      auto data = *r.data;
      data.pgoff -= ee->entry_offset();
      r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
      return;
    }
  }
  std::string zip_path;
  std::string entry_name;
  if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
    filename = GetUrlInApk(zip_path, entry_name);
    name_changed = true;
  }
  if (name_changed) {
    auto data = *r.data;
    r.SetDataAndFilename(data, filename);
  }
}

void RecordCommand::UpdateRecord(Record* record) {
  if (record->type() == PERF_RECORD_MMAP) {
    UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
  } else if (record->type() == PERF_RECORD_MMAP2) {
    auto r = static_cast<Mmap2Record*>(record);
    UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
  } else if (record->type() == PERF_RECORD_COMM) {
    auto r = static_cast<CommRecord*>(record);
    if (r->data->pid == r->data->tid) {
      std::string s = GetCompleteProcessName(r->data->pid);
      if (!s.empty()) {
        r->SetCommandName(s);
      }
    }
  }
}

bool RecordCommand::UnwindRecord(SampleRecord& r) {
  if (!(r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
      (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER)) {
    return true;
  }
  if (r.GetValidStackSize() > 0) {
    ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
    RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
    std::vector<uint64_t> ips;
    std::vector<uint64_t> sps;
    if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
                                            r.GetValidStackSize(), &ips, &sps)) {
      return false;
    }
    // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
    // from the process and retry unwinding.
    if (jit_debug_reader_ && !post_unwind_ &&
        offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
      jit_debug_reader_->ReadProcess(r.tid_data.pid);
      jit_debug_reader_->FlushDebugInfo(r.Timestamp());
      if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
                                              r.GetValidStackSize(), &ips, &sps)) {
        return false;
      }
    }
    if (keep_failed_unwinding_result_ && !KeepFailedUnwindingResult(r, ips, sps)) {
      return false;
    }
    r.ReplaceRegAndStackWithCallChain(ips);
    if (callchain_joiner_ &&
        !callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
                                         CallChainJoiner::ORIGINAL_OFFLINE, ips, sps)) {
      return false;
    }
  } else {
    // For kernel samples, we still need to remove user stack and register fields.
    r.ReplaceRegAndStackWithCallChain({});
  }
  return true;
}

bool RecordCommand::KeepFailedUnwindingResult(const SampleRecord& r,
                                              const std::vector<uint64_t>& ips,
                                              const std::vector<uint64_t>& sps) {
  auto& result = offline_unwinder_->GetUnwindingResult();
  if (result.error_code != unwindstack::ERROR_NONE) {
    if (keep_failed_unwinding_debug_info_) {
      return record_file_writer_->WriteRecord(UnwindingResultRecord(
          r.time_data.time, result, r.regs_user_data, r.stack_user_data, ips, sps));
    }
    return record_file_writer_->WriteRecord(
        UnwindingResultRecord(r.time_data.time, result, {}, {}, {}, {}));
  }
  return true;
}

std::unique_ptr<RecordFileReader> RecordCommand::MoveRecordFile(const std::string& old_filename) {
  if (!record_file_writer_->FinishWritingDataSection() || !record_file_writer_->Close()) {
    return nullptr;
  }
  record_file_writer_.reset();
  std::error_code ec;
  std::filesystem::rename(record_filename_, old_filename, ec);
  if (ec) {
    LOG(DEBUG) << "Failed to rename: " << ec.message();
    // rename() fails on Android N x86 emulator, which uses kernel 3.10. Because rename() in bionic
    // uses renameat2 syscall, which isn't support on kernel < 3.15. So add a fallback to mv
    // command. The mv command can also work with other situations when rename() doesn't work.
    // So we'd like to keep it as a fallback to rename().
    if (!Workload::RunCmd({"mv", record_filename_, old_filename})) {
      return nullptr;
    }
  }

  auto reader = RecordFileReader::CreateInstance(old_filename);
  if (!reader) {
    return nullptr;
  }

  record_file_writer_ = CreateRecordFile(record_filename_, reader->AttrSection());
  if (!record_file_writer_) {
    return nullptr;
  }
  return reader;
}

bool RecordCommand::PostUnwindRecords() {
  auto tmp_file = ScopedTempFiles::CreateTempFile();
  auto reader = MoveRecordFile(tmp_file->path);
  if (!reader) {
    return false;
  }
  // Write new event attrs without regs and stacks fields.
  EventAttrIds attrs = reader->AttrSection();
  for (auto& attr : attrs) {
    ReplaceRegAndStackWithCallChain(attr.attr);
  }
  if (!record_file_writer_->WriteAttrSection(attrs)) {
    return false;
  }

  sample_record_count_ = 0;
  auto callback = [this](std::unique_ptr<Record> record) {
    return SaveRecordAfterUnwinding(record.get());
  };
  return reader->ReadDataSection(callback);
}

bool RecordCommand::JoinCallChains() {
  // 1. Prepare joined callchains.
  if (!callchain_joiner_->JoinCallChains()) {
    return false;
  }
  // 2. Move records from record_filename_ to a temporary file.
  auto tmp_file = ScopedTempFiles::CreateTempFile();
  auto reader = MoveRecordFile(tmp_file->path);
  if (!reader) {
    return false;
  }

  // 3. Read records from the temporary file, and write record with joined call chains back
  // to record_filename_.
  auto record_callback = [&](std::unique_ptr<Record> r) {
    if (r->type() != PERF_RECORD_SAMPLE) {
      return record_file_writer_->WriteRecord(*r);
    }
    SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
    if (!sr.HasUserCallChain()) {
      return record_file_writer_->WriteRecord(sr);
    }
    pid_t pid;
    pid_t tid;
    CallChainJoiner::ChainType type;
    std::vector<uint64_t> ips;
    std::vector<uint64_t> sps;
    if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
      return false;
    }
    CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
    CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
    CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
    sr.UpdateUserCallChain(ips);
    return record_file_writer_->WriteRecord(sr);
  };
  return reader->ReadDataSection(record_callback);
}

static void LoadSymbolMapFile(int pid, const std::string& package, ThreadTree* thread_tree) {
  // On Linux, symbol map files usually go to /tmp/perf-<pid>.map
  // On Android, there is no directory where any process can create files.
  // For now, use /data/local/tmp/perf-<pid>.map, which works for standalone programs,
  // and /data/data/<package>/perf-<pid>.map, which works for apps.
  auto path = package.empty()
                  ? android::base::StringPrintf("/data/local/tmp/perf-%d.map", pid)
                  : android::base::StringPrintf("/data/data/%s/perf-%d.map", package.c_str(), pid);

  auto symbols = ReadSymbolMapFromFile(path);
  if (!symbols.empty()) {
    thread_tree->AddSymbolsForProcess(pid, &symbols);
  }
}

bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args) {
  // Read data section of perf.data to collect hit file information.
  thread_tree_.ClearThreadAndMap();
  bool kernel_symbols_available = false;
  std::string kallsyms;
  if (event_selection_set_.NeedKernelSymbol() && LoadKernelSymbols(&kallsyms)) {
    Dso::SetKallsyms(kallsyms);
    kernel_symbols_available = true;
  }
  std::unordered_set<int> loaded_symbol_maps;
  const std::vector<uint64_t>& auxtrace_offset = record_file_writer_->AuxTraceRecordOffsets();
  std::unordered_set<Dso*> debug_unwinding_files;
  bool failed_unwinding_sample = false;

  auto callback = [&](const Record* r) {
    thread_tree_.Update(*r);
    if (r->type() == PERF_RECORD_SAMPLE) {
      auto sample = reinterpret_cast<const SampleRecord*>(r);
      // Symbol map files are available after recording. Load one for the process.
      if (loaded_symbol_maps.insert(sample->tid_data.pid).second) {
        LoadSymbolMapFile(sample->tid_data.pid, app_package_name_, &thread_tree_);
      }
      if (failed_unwinding_sample) {
        failed_unwinding_sample = false;
        CollectHitFileInfo(*sample, &debug_unwinding_files);
      } else {
        CollectHitFileInfo(*sample, nullptr);
      }
    } else if (r->type() == SIMPLE_PERF_RECORD_UNWINDING_RESULT) {
      failed_unwinding_sample = true;
    }
  };

  if (map_record_thread_) {
    if (!map_record_thread_->Join()) {
      return false;
    }
    // If not dumping build id, we only need to read kernel maps, to dump kernel module addresses
    // in file feature section.
    if (!map_record_thread_->ReadMapRecords(callback, !dump_build_id_)) {
      return false;
    }
  }

  // We don't need to read data section when recording ETM data and not need to dump build ids.
  bool read_data_section = true;
  if (event_selection_set_.HasAuxTrace() && !dump_build_id_) {
    read_data_section = false;
  }

  if (read_data_section && !record_file_writer_->ReadDataSection(callback)) {
    return false;
  }

  size_t feature_count = 5;
  if (dump_build_id_) {
    feature_count++;
  }
  if (branch_sampling_) {
    feature_count++;
  }
  if (!auxtrace_offset.empty()) {
    feature_count++;
  }
  if (keep_failed_unwinding_debug_info_) {
    feature_count += 2;
  }
  if (etm_branch_list_generator_) {
    feature_count++;
  }
  if (map_record_thread_) {
    feature_count++;
  }
  if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
    return false;
  }
  if (dump_build_id_ && !DumpBuildIdFeature()) {
    return false;
  }
  if (!DumpFileFeature()) {
    return false;
  }
  utsname uname_buf;
  if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
    PLOG(ERROR) << "uname() failed";
    return false;
  }
  if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) {
    return false;
  }
  if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) {
    return false;
  }

  std::string exec_path = android::base::GetExecutablePath();
  if (exec_path.empty()) exec_path = "simpleperf";
  std::vector<std::string> cmdline;
  cmdline.push_back(exec_path);
  cmdline.push_back("record");
  cmdline.insert(cmdline.end(), args.begin(), args.end());
  if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
    return false;
  }
  if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) {
    return false;
  }
  if (!DumpMetaInfoFeature(kernel_symbols_available)) {
    return false;
  }
  if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
    return false;
  }
  if (keep_failed_unwinding_debug_info_ && !DumpDebugUnwindFeature(debug_unwinding_files)) {
    return false;
  }
  if (etm_branch_list_generator_ && !DumpETMBranchListFeature()) {
    return false;
  }
  if (map_record_thread_ && !DumpInitMapFeature()) {
    return false;
  }

  if (!record_file_writer_->EndWriteFeatures()) {
    return false;
  }
  return true;
}

bool RecordCommand::DumpBuildIdFeature() {
  std::vector<BuildIdRecord> build_id_records;
  BuildId build_id;
  std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
  for (Dso* dso : dso_v) {
    // For aux tracing, we don't know which binaries are traced.
    // So dump build ids for all binaries.
    if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
      continue;
    }
    if (GetBuildId(*dso, build_id)) {
      bool in_kernel = dso->type() == DSO_KERNEL || dso->type() == DSO_KERNEL_MODULE;
      build_id_records.emplace_back(in_kernel, UINT_MAX, build_id, dso->Path());
    }
  }
  if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
    return false;
  }
  return true;
}

bool RecordCommand::DumpFileFeature() {
  std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
  // To parse ETM data for kernel modules, we need to dump memory address for kernel modules.
  if (event_selection_set_.HasAuxTrace() && !event_selection_set_.ExcludeKernel()) {
    for (Dso* dso : dso_v) {
      if (dso->type() == DSO_KERNEL_MODULE) {
        dso->CreateDumpId();
      }
    }
  }
  return record_file_writer_->WriteFileFeatures(dso_v);
}

bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
  std::unordered_map<std::string, std::string> info_map = extra_meta_info_;
  info_map["simpleperf_version"] = GetSimpleperfVersion();
  info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
  info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
  // By storing event types information in perf.data, the readers of perf.data have the same
  // understanding of event types, even if they are on another machine.
  info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
#if defined(__ANDROID__)
  info_map["product_props"] = android::base::StringPrintf(
      "%s:%s:%s", android::base::GetProperty("ro.product.manufacturer", "").c_str(),
      android::base::GetProperty("ro.product.model", "").c_str(),
      android::base::GetProperty("ro.product.name", "").c_str());
  info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
  info_map["android_sdk_version"] = android::base::GetProperty("ro.build.version.sdk", "");
  info_map["android_build_type"] = android::base::GetProperty("ro.build.type", "");
  info_map["android_build_fingerprint"] = android::base::GetProperty("ro.build.fingerprint", "");
  utsname un;
  if (uname(&un) == 0) {
    info_map["kernel_version"] = un.release;
  }
  if (!app_package_name_.empty()) {
    info_map["app_package_name"] = app_package_name_;
    if (IsRoot()) {
      info_map["app_type"] = GetAppType(app_package_name_);
    }
  }
  if (event_selection_set_.HasAuxTrace()) {
    // used by --exclude-perf in cmd_inject.cpp
    info_map["recording_process"] = std::to_string(getpid());
  }
#endif
  info_map["clockid"] = clockid_;
  info_map["timestamp"] = std::to_string(time(nullptr));
  info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
  if (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_) {
    OfflineUnwinder::CollectMetaInfo(&info_map);
  }
  auto record_stat = event_selection_set_.GetRecordStat();
  info_map["record_stat"] = android::base::StringPrintf(
      "sample_record_count=%" PRIu64
      ",kernelspace_lost_records=%zu,userspace_lost_samples=%zu,"
      "userspace_lost_non_samples=%zu,userspace_truncated_stack_samples=%zu",
      sample_record_count_, record_stat.kernelspace_lost_records,
      record_stat.userspace_lost_samples, record_stat.userspace_lost_non_samples,
      record_stat.userspace_truncated_stack_samples);

  return record_file_writer_->WriteMetaInfoFeature(info_map);
}

bool RecordCommand::DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set) {
  DebugUnwindFeature debug_unwind_feature;
  debug_unwind_feature.reserve(dso_set.size());
  for (const Dso* dso : dso_set) {
    if (dso->type() != DSO_ELF_FILE) {
      continue;
    }
    const std::string& filename = dso->GetDebugFilePath();
    std::unique_ptr<ElfFile> elf = ElfFile::Open(filename);
    if (elf) {
      llvm::MemoryBuffer* buffer = elf->GetMemoryBuffer();
      debug_unwind_feature.resize(debug_unwind_feature.size() + 1);
      auto& debug_unwind_file = debug_unwind_feature.back();
      debug_unwind_file.path = filename;
      debug_unwind_file.size = buffer->getBufferSize();
      if (!record_file_writer_->WriteFeature(PerfFileFormat::FEAT_DEBUG_UNWIND_FILE,
                                             buffer->getBufferStart(), buffer->getBufferSize())) {
        return false;
      }
    } else {
      LOG(WARNING) << "failed to keep " << filename << " in debug_unwind_feature section";
    }
  }
  return record_file_writer_->WriteDebugUnwindFeature(debug_unwind_feature);
}

void RecordCommand::CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set) {
  const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
  size_t kernel_ip_count;
  std::vector<uint64_t> ips = r.GetCallChain(&kernel_ip_count);
  if ((r.sample_type & PERF_SAMPLE_BRANCH_STACK) != 0) {
    for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
      const auto& item = r.branch_stack_data.stack[i];
      ips.push_back(item.from);
      ips.push_back(item.to);
    }
  }
  for (size_t i = 0; i < ips.size(); i++) {
    const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
    Dso* dso = map->dso;
    if (dump_symbols_) {
      const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], nullptr, &dso);
      if (!symbol->HasDumpId()) {
        dso->CreateSymbolDumpId(symbol);
      }
    }
    if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
      dso->CreateDumpId();
    }
    if (dso_set != nullptr) {
      dso_set->insert(dso);
    }
  }
}

bool RecordCommand::DumpETMBranchListFeature() {
  ETMBinaryMap binary_map = etm_branch_list_generator_->GetETMBinaryMap();
  std::string s;
  if (!ETMBinaryMapToString(binary_map, s)) {
    return false;
  }
  return record_file_writer_->WriteFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST, s.data(),
                                           s.size());
}

bool RecordCommand::DumpInitMapFeature() {
  if (!map_record_thread_->Join()) {
    return false;
  }
  auto callback = [&](const char* data, size_t size) {
    return record_file_writer_->WriteInitMapFeature(data, size);
  };
  return map_record_thread_->ReadMapRecordData(callback) &&
         record_file_writer_->FinishWritingInitMapFeature();
}

}  // namespace

static bool ConsumeStr(const char*& p, const char* s) {
  if (strncmp(p, s, strlen(s)) == 0) {
    p += strlen(s);
    return true;
  }
  return false;
}

static bool ConsumeAddr(const char*& p, uint64_t* addr) {
  errno = 0;
  char* end;
  *addr = strtoull(p, &end, 0);
  if (errno == 0 && p != end) {
    p = end;
    return true;
  }
  return false;
}

// To reduce function length, not all format errors are checked.
static bool ParseOneAddrFilter(const std::string& s, std::vector<AddrFilter>* filters) {
  std::vector<std::string> args = android::base::Split(s, " ");
  if (args.size() != 2) {
    return false;
  }

  uint64_t addr1;
  uint64_t addr2;
  uint64_t off1;
  uint64_t off2;
  std::string path;

  if (auto p = s.data(); ConsumeStr(p, "start") && ConsumeAddr(p, &addr1)) {
    if (*p == '\0') {
      // start <kernel_addr>
      filters->emplace_back(AddrFilter::KERNEL_START, addr1, 0, "");
      return true;
    }
    if (ConsumeStr(p, "@") && *p != '\0') {
      // start <vaddr>@<file_path>
      if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
        filters->emplace_back(AddrFilter::FILE_START, off1, 0, path);
        return true;
      }
    }
  }
  if (auto p = s.data(); ConsumeStr(p, "stop") && ConsumeAddr(p, &addr1)) {
    if (*p == '\0') {
      // stop <kernel_addr>
      filters->emplace_back(AddrFilter::KERNEL_STOP, addr1, 0, "");
      return true;
    }
    if (ConsumeStr(p, "@") && *p != '\0') {
      // stop <vaddr>@<file_path>
      if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
        filters->emplace_back(AddrFilter::FILE_STOP, off1, 0, path);
        return true;
      }
    }
  }
  if (auto p = s.data(); ConsumeStr(p, "filter") && ConsumeAddr(p, &addr1) && ConsumeStr(p, "-") &&
                         ConsumeAddr(p, &addr2)) {
    if (*p == '\0') {
      // filter <kernel_addr_start>-<kernel_addr_end>
      filters->emplace_back(AddrFilter::KERNEL_RANGE, addr1, addr2 - addr1, "");
      return true;
    }
    if (ConsumeStr(p, "@") && *p != '\0') {
      // filter <vaddr_start>-<vaddr_end>@<file_path>
      if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) &&
                                       elf->VaddrToOff(addr2, &off2) && Realpath(p, &path)) {
        filters->emplace_back(AddrFilter::FILE_RANGE, off1, off2 - off1, path);
        return true;
      }
    }
  }
  if (auto p = s.data(); ConsumeStr(p, "filter") && *p != '\0') {
    // filter <file_path>
    path = android::base::Trim(p);
    if (auto elf = ElfFile::Open(path); elf) {
      for (const ElfSegment& seg : elf->GetProgramHeader()) {
        if (seg.is_executable) {
          filters->emplace_back(AddrFilter::FILE_RANGE, seg.file_offset, seg.file_size, path);
        }
      }
      return true;
    }
  }
  return false;
}

std::vector<AddrFilter> ParseAddrFilterOption(const std::string& s) {
  std::vector<AddrFilter> filters;
  for (const auto& str : android::base::Split(s, ",")) {
    if (!ParseOneAddrFilter(str, &filters)) {
      LOG(ERROR) << "failed to parse addr filter: " << str;
      return {};
    }
  }
  return filters;
}

void RegisterRecordCommand() {
  RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
}

}  // namespace simpleperf
