# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Python module to find feature names in source code.

These functions are declared in a separate module to allow multiprocessing to
correctly unpickle the called functions again.
"""

import glob
import itertools
import multiprocessing
import pathlib
import re

BASE_FEATURE_PATTERN = br"BASE_FEATURE\((.*?),(.*?),(.*?)\);"
BASE_FEATURE_RE = re.compile(BASE_FEATURE_PATTERN, flags=re.MULTILINE+re.DOTALL)

# Only search these directories for flags. If your flag is outside these root
# directories, then add the directory here.
DIRECTORIES_TO_SEARCH = [
    "android_webview",
    "apps",
    "ash",
    "base",
    "cc",
    "chrome",
    "chromecast",
    "chromeos",
    "clank",
    "components",
    "content",
    "courgette",
    "crypto",
    "dbus",
    "device",
    "extensions",
    "fuchsia_web",
    "gin",
    "google_apis",
    "google_update",
    "gpu",
    "headless",
    "infra",
    "internal",
    "ios",
    "ipc",
    "media",
    "mojo",
    "native_client",
    "native_client_sdk",
    "net",
    "pdf",
    "ppapi",
    "printing",
    "remoting",
    "rlz",
    "sandbox",
    "services",
    "skia",
    "sql",
    "storage",
    # third_party/blink handled separately in FindDeclaredFeatures
    "ui",
    "url",
    "v8",
    "webkit",
    "weblayer",
]

def _FindFeaturesInFile(filepath):
  # Work on bytes to avoid utf-8 decode errors outside feature declarations
  file_contents = pathlib.Path(filepath).read_bytes()
  matches = BASE_FEATURE_RE.finditer(file_contents)
  # Remove whitespace and surrounding " from the second argument
  # which is the feature name.
  return [m.group(2).strip().strip(b'"').decode("utf-8") for m in matches]


def FindDeclaredFeatures(input_api):
  """Finds all declared feature names in the source code.

  This function will scan all *.cc and *.mm files and look for features
  defined with the BASE_FEATURE macro. It will extract the feature names.

  Args:
    input_api: InputApi instance for opening files
  Returns:
    Set of defined feature names in the source tree.
  """
  # Features are supposed to be defined in .cc files.
  # Iterate over the search folders in the root.
  root = pathlib.Path(input_api.change.RepositoryRoot())
  glob_patterns = [str(p / pathlib.Path("**/*.cc")) for p in root.iterdir() if
        p.is_dir() and p.name in DIRECTORIES_TO_SEARCH]

  # blink is the only directory in third_party that should be searched.
  blink_glob = str(root / pathlib.Path("third_party/blink/**/*.cc"))
  glob_patterns.append(blink_glob)

  # Additional features for iOS can be found in mm files in the ios directory.
  mm_glob = str(root / pathlib.Path("ios/**/*.mm"))
  glob_patterns.append(mm_glob)

  # Create glob iterators that lazily go over the files to search
  glob_iterators = [glob.iglob(pattern, recursive=True) for pattern in
        glob_patterns]

  # Limit to 4 processes - the disk accesses becomes a bottleneck with just a
  # few processes, but splitting the searching across multiple CPUs does yield
  # a benefit of a few seconds.
  # The exact batch size does not seem to matter much, as long as it is >> 1.
  pool = multiprocessing.Pool(4)
  found_features = pool.imap_unordered(_FindFeaturesInFile,
                       itertools.chain(*glob_iterators), 1000)
  pool.close()
  pool.join()

  feature_names = set()
  for feature_list in found_features:
    feature_names.update(feature_list)
  return feature_names
