/*
 * Copyright (C) 2022 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
#define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_

#include <optional>
#include <string_view>
#include <vector>

#include "perfetto/ext/base/small_vector.h"
#include "perfetto/ext/base/string_splitter.h"
#include "perfetto/ext/base/string_view.h"

namespace perfetto {
namespace trace_processor {
namespace util {

// Lightweight implementation of matching on UNIX glob patterns, maintaining
// compatibility of syntax and semantics used by SQLite.
//
// Usage:
//  GlobMatcher matcher = GlobMatcher::FromPattern("*foo*");
//  for (auto string : strings) {
//    if (matcher.Matches(string)) {
//      <do something>
//    }
//  }
//
// This is a class instead of a free function to allow preprocessing the
// pattern (e.g. to compute Kleene star offsets). This can create big savings
// because trace processsor needs to match the same pattern on many strings
// when filtering tables.
//
// Implementation:
// The algorithm used in this class is similar to the "alternative"
// algorithm proposed in [1].
//
// We preprocess the pattern (in the constructor) to split the pattern on *,
// accounting for character classes. This breaks the pattern in "segments": our
// name for the parts of the pattern between the stars.
//
// Then at match time, we go through each segment and check if it matches part
// of the string. The number of character matched defines the search start-point
// for the next segment. As described in [1], we don't need to do any
// backtracking which removes the exponential component of the algorithm and
// consequently simplifies the code.
//
// The subtle parts are:
// 1) the first and last segments - they need to be "anchored" to the
//    beginning and end of the string respectively. If not, they fail the match
//    straight away.
// 2) leading/trailing stars: they counteract the above point and "unanchor"
//    the first and last segments respectively by allowing them to happen
//    somewhere after/before the beginning/end.
//
// [1] https://research.swtch.com/glob
class GlobMatcher {
 public:
  GlobMatcher(GlobMatcher&&) = default;
  GlobMatcher& operator=(GlobMatcher&&) = default;

  // Creates a glob matcher from a pattern.
  static GlobMatcher FromPattern(base::StringView pattern_str) {
    return GlobMatcher(std::move(pattern_str));
  }

  // Checks the provided string against the pattern and returns whether it
  // matches.
  bool Matches(base::StringView input) const;

  // Returns whether the comparison should really be an equality comparison.
  bool IsEquality() {
    return !leading_star_ && !trailing_star_ && segments_.size() <= 1;
  }

 private:
  // Represents a portion of the pattern in between two * characters.
  struct Segment {
    // The portion of the pattern in the segment. Note that this will not
    // contain a free '*' (i.e. outside a character class).
    base::StringView pattern;

    // The number of consumed characters in an input string if this segment
    // matches.
    uint32_t matched_chars;
  };

  // It would be very rare for a glob pattern to have more than 4 stars so
  // reserve stack space for that many segments.
  static constexpr uint32_t kMaxSegmentsOnStack = 4;

  explicit GlobMatcher(base::StringView pattern);

  GlobMatcher(const GlobMatcher&) = delete;
  GlobMatcher& operator=(const GlobMatcher&) = delete;

  // Returns whether |input| starts with the pattern in |segment| following
  // glob matching rules.
  bool StartsWith(base::StringView input, const Segment& segment) const {
    if (!contains_char_class_or_question_) {
      return input.StartsWith(segment.pattern);
    }
    return StartsWithSlow(input, segment);
  }

  // Returns whether |input| ends with the pattern in |segment| following
  // glob matching rules.
  bool EndsWith(base::StringView input, const Segment& segment) const {
    if (!contains_char_class_or_question_) {
      return input.EndsWith(segment.pattern);
    }
    // Ending with |segment| is the same as taking the substring of |in|
    size_t start = input.size() - segment.matched_chars;
    return StartsWithSlow(input.substr(start), segment);
  }

  // Returns the index where |input| matches the pattern in |segment|
  // following glob matching rules or base::StringView::npos, if no such index
  // exists.
  size_t Find(base::StringView input,
              const Segment& segment,
              size_t start) const {
    if (!contains_char_class_or_question_) {
      return input.find(segment.pattern, start);
    }
    for (uint32_t i = 0; i < input.size(); ++i) {
      if (StartsWithSlow(input.substr(i), segment)) {
        return i;
      }
    }
    return base::StringView::npos;
  }

  // Given a StringView starting at the boundary of a character class, returns
  // a StringView containing only the parts inside the [] or base::StringView()
  // if no character class exists.
  static base::StringView ExtractCharacterClass(base::StringView input);

  // Matches |in| against the given character class.
  static bool MatchesCharacterClass(char input, base::StringView char_class);

  bool StartsWithSlow(base::StringView input, const Segment& segment) const;

  // IMPORTANT: this should *not* be modified after the constructor as we store
  // pointers to the data inside here.
  // Note: this vector also allocates space for the null-terminator so is +1
  // the "traditional" size of the string.
  std::vector<char> pattern_;

  // Chunks of the |pattern_| tokenized on '*'. See the class comment for more
  // info.
  base::SmallVector<Segment, kMaxSegmentsOnStack> segments_;

  bool leading_star_ = false;
  bool trailing_star_ = false;
  bool contains_char_class_or_question_ = false;
};

}  // namespace util
}  // namespace trace_processor
}  // namespace perfetto

#endif  // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
