// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <stdint.h>

#include "base/i18n/string_search.h"

#include "base/check.h"
#include "base/check_op.h"
#include "third_party/icu/source/i18n/unicode/usearch.h"

namespace base {
namespace i18n {

FixedPatternStringSearch::FixedPatternStringSearch(
    const std::u16string& find_this,
    bool case_sensitive)
    : find_this_(find_this) {
  // usearch_open requires a valid string argument to be searched, even if we
  // want to set it by usearch_setText afterwards. So, supplying a dummy text.
  const std::u16string& dummy = find_this_;

  UErrorCode status = U_ZERO_ERROR;
  search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(),
                         dummy.size(), uloc_getDefault(),
                         nullptr,  // breakiter
                         &status);
  if (U_SUCCESS(status)) {
    // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
    // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
    // differences. Set comparison level to UCOL_TERTIARY to include all
    // comparison differences.
    // Diacritical differences on the same base letter represent a
    // secondary difference.
    // Uppercase and lowercase versions of the same character represents a
    // tertiary difference.
    UCollator* collator = usearch_getCollator(search_);
    ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
    usearch_reset(search_);
  }
}

FixedPatternStringSearch::~FixedPatternStringSearch() {
  if (search_)
    usearch_close(search_.ExtractAsDangling());
}

bool FixedPatternStringSearch::Search(const std::u16string& in_this,
                                      size_t* match_index,
                                      size_t* match_length,
                                      bool forward_search) {
  UErrorCode status = U_ZERO_ERROR;
  usearch_setText(search_, in_this.data(), in_this.size(), &status);

  // Default to basic substring search if usearch fails. According to
  // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
  // if either |find_this| or |in_this| are empty. In either case basic
  // substring search will give the correct return value.
  if (!U_SUCCESS(status)) {
    size_t index = in_this.find(find_this_);
    if (index == std::u16string::npos)
      return false;
    if (match_index)
      *match_index = index;
    if (match_length)
      *match_length = find_this_.size();
    return true;
  }

  int32_t index = forward_search ? usearch_first(search_, &status)
                                 : usearch_last(search_, &status);
  if (!U_SUCCESS(status) || index == USEARCH_DONE)
    return false;
  if (match_index)
    *match_index = static_cast<size_t>(index);
  if (match_length)
    *match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
  return true;
}

FixedPatternStringSearchIgnoringCaseAndAccents::
    FixedPatternStringSearchIgnoringCaseAndAccents(
        const std::u16string& find_this)
    : base_search_(find_this, /*case_sensitive=*/false) {}

bool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
    const std::u16string& in_this,
    size_t* match_index,
    size_t* match_length) {
  return base_search_.Search(in_this, match_index, match_length,
                             /*forward_search=*/true);
}

bool StringSearchIgnoringCaseAndAccents(const std::u16string& find_this,
                                        const std::u16string& in_this,
                                        size_t* match_index,
                                        size_t* match_length) {
  return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
      in_this, match_index, match_length);
}

bool StringSearch(const std::u16string& find_this,
                  const std::u16string& in_this,
                  size_t* match_index,
                  size_t* match_length,
                  bool case_sensitive,
                  bool forward_search) {
  return FixedPatternStringSearch(find_this, case_sensitive)
      .Search(in_this, match_index, match_length, forward_search);
}

RepeatingStringSearch::RepeatingStringSearch(const std::u16string& find_this,
                                             const std::u16string& in_this,
                                             bool case_sensitive)
    : find_this_(find_this), in_this_(in_this) {
  std::string locale = uloc_getDefault();
  UErrorCode status = U_ZERO_ERROR;
  search_ = usearch_open(find_this_.data(), find_this_.size(), in_this_.data(),
                         in_this_.size(), locale.data(), /*breakiter=*/nullptr,
                         &status);
  DCHECK(U_SUCCESS(status));
  if (U_SUCCESS(status)) {
    // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
    // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
    // differences. Set comparison level to UCOL_TERTIARY to include all
    // comparison differences.
    // Diacritical differences on the same base letter represent a
    // secondary difference.
    // Uppercase and lowercase versions of the same character represents a
    // tertiary difference.
    UCollator* collator = usearch_getCollator(search_);
    ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
    usearch_reset(search_);
  }
}

RepeatingStringSearch::~RepeatingStringSearch() {
  if (search_)
    usearch_close(search_.ExtractAsDangling());
}

bool RepeatingStringSearch::NextMatchResult(int& match_index,
                                            int& match_length) {
  UErrorCode status = U_ZERO_ERROR;
  const int match_start = usearch_next(search_, &status);
  if (U_FAILURE(status) || match_start == USEARCH_DONE)
    return false;
  DCHECK(U_SUCCESS(status));
  match_index = match_start;
  match_length = usearch_getMatchedLength(search_);
  return true;
}

}  // namespace i18n
}  // namespace base
