/*
 * Copyright (C) 2022 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "src/trace_processor/util/glob.h"

#include "perfetto/ext/base/string_utils.h"

namespace perfetto {
namespace trace_processor {
namespace util {

GlobMatcher::GlobMatcher(base::StringView pattern_str)
    : pattern_(pattern_str.size() + 1) {
  base::StringCopy(pattern_.data(), pattern_str.data(), pattern_.size());

  base::StringView pattern(pattern_.data());

  // Note: see the class header for how this algorithm works.
  uint32_t segment_start = 0;
  uint32_t segment_potential_matched_chars = 0;
  auto create_segment = [this, &segment_start, &segment_potential_matched_chars,
                         &pattern](size_t i) {
    base::StringView segment = pattern.substr(segment_start, i - segment_start);
    PERFETTO_DCHECK(segment_potential_matched_chars <= segment.size());
    if (!segment.empty()) {
      PERFETTO_DCHECK(segment_potential_matched_chars > 0);
      segments_.emplace_back(Segment{segment, segment_potential_matched_chars});
    }
    return segment.empty();
  };

  for (uint32_t i = 0; i < pattern.size(); ++i) {
    char c = pattern.at(i);

    // If we don't have a star, we are only matching a single character (but
    // potentially with a character class which contains >1 character).
    if (c != '*') {
      if (c == '[') {
        base::StringView cclass = ExtractCharacterClass(pattern.substr(i + 1));
        contains_char_class_or_question_ |= !cclass.empty();

        // Skip the current '[' character.
        ++i;

        // Skip the whole character class. This will leave i pointing at the
        // terminating character (i.e. ']'). With the ++i in the loop, this will
        // correctly lead us going past the whole class.
        i += cclass.size();
      }

      contains_char_class_or_question_ |= c == '?';
      ++segment_potential_matched_chars;
      continue;
    }

    // Add the characters collected so far as a segment.
    create_segment(i);
    segment_start = i + 1;
    segment_potential_matched_chars = 0;
  }

  // Ensure we add any remaining characters as a segment.
  bool empty_segment = create_segment(pattern.size());
  leading_star_ = !pattern.empty() && pattern.at(0) == '*';
  trailing_star_ = !pattern.empty() && empty_segment;
}

bool GlobMatcher::Matches(base::StringView in) const {
  // If there are no segments, that means the pattern is either '' or '*'
  // (or '**', '***' etc which is really the same as '*'). This means
  // we are match if either a) there is a leading star (== trailing star) or
  // b) the input string is empty.
  if (segments_.empty()) {
    PERFETTO_DCHECK(leading_star_ == trailing_star_);
    return leading_star_ || in.empty();
  }

  // If there is only one segment and no stars we need an equality match.
  // As we still need to handle '[..]' and '?', we cannot just use string
  // equality. We *can* however use StartsWith and check the matched
  // characters is equal to the length of the input: this is basically the
  // same as checking equality.
  if (segments_.size() == 1 && !leading_star_ && !trailing_star_) {
    return segments_.front().matched_chars == in.size() &&
           StartsWith(in, segments_.front());
  }

  // If there's no leading star, the first segment needs to be handled
  // separately because it *needs* to be anchored to the left of the
  // string rather than appearing at some point later.
  if (!leading_star_ && !StartsWith(in, segments_.front())) {
    return false;
  }

  // Similarily, if there's no trailing star, the last segment needs to be
  // "anchored" to the right of the string.
  if (!trailing_star_ && !EndsWith(in, segments_.back())) {
    return false;
  }

  // For any segment we haven't checked, they needs to appear in the string
  // sequentially with possibly some characters separating them. To handle
  // this, we just need to iteratively find each segment, starting from the
  // previous segment.
  const Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1);
  const Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1);
  size_t find_idx = leading_star_ ? 0 : segments_.front().matched_chars;
  for (const auto* segment = segment_start; segment < segment_end; ++segment) {
    size_t pos = Find(in, *segment, find_idx);
    if (pos == base::StringView::npos) {
      return false;
    }
    find_idx = pos + segment->matched_chars;
  }

  // Every segment has been found to match so far including the leading and
  // trailing one so the entire string matches!
  return true;
}

bool GlobMatcher::StartsWithSlow(base::StringView in,
                                 const Segment& segment) const {
  base::StringView pattern = segment.pattern;
  for (uint32_t i = 0, p = 0; p < pattern.size(); ++i, ++p) {
    // We've run out of characters to consume in the input but still have more
    // to consume in the pattern: |in| cannot possibly start with |pattern|.
    if (i >= in.size()) {
      return false;
    }

    char in_c = in.at(i);
    char pattern_c = pattern.at(p);

    // '?' matches any character.
    if (pattern_c == '?') {
      continue;
    }

    // '[' signifies the start of a character class.
    if (pattern_c == '[') {
      base::StringView cclass = ExtractCharacterClass(pattern.substr(p + 1));
      if (!MatchesCharacterClass(in_c, cclass)) {
        return false;
      }

      // Skip the current '[' character.
      p++;

      // Skip the whole character class. This will leave i pointing at the
      // terminating character (i.e. ']'). With the ++i in the loop, this will
      // correctly lead us going past the whole class.
      p += cclass.size();
      continue;
    }

    // Anything else is just an ordinary character.
    if (in_c != pattern_c) {
      return false;
    }
  }
  return true;
}

base::StringView GlobMatcher::ExtractCharacterClass(base::StringView in) {
  if (in.empty())
    return base::StringView();

  // We should always skip the first real character: it could be ']' but if
  // so, it is treated as a normal character because empty classes are not
  // valid.
  bool invert = in.at(0) == '^';
  size_t end_idx = in.find(']', invert ? 2 : 1);
  return end_idx == base::StringView::npos ? base::StringView()
                                           : in.substr(0, end_idx);
}

bool GlobMatcher::MatchesCharacterClass(char in, base::StringView char_class) {
  PERFETTO_DCHECK(!char_class.empty());

  const char* start = char_class.data();
  const char* end = start + char_class.size();

  bool invert = *start == '^';
  start += invert;

  PERFETTO_DCHECK(start != end);

  for (auto* ptr = start; ptr != end; ++ptr) {
    char cur = *ptr;

    // If we see a '-' at any point except at the start or end of the string,
    // it represents a matching range (e.g. a-z represents matching any
    // character between a and z).
    if (cur == '-' && ptr != start && ptr != end - 1) {
      // Look at the previous and next characters in the class and check if the
      // character falls in the range.
      char range_start = ptr[-1];
      char range_end = ptr[1];
      if (range_start <= in && in <= range_end) {
        return !invert;
      }
      continue;
    }

    // We match a character in the class.
    if (in == cur) {
      return !invert;
    }
  }

  // If we're here, nothing in the class matched: return whether the class was
  // inverted as this would actually be a match.
  return invert;
}

}  // namespace util
}  // namespace trace_processor
}  // namespace perfetto
