# Copyright 2024 The Bazel Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Parse SimpleAPI HTML in Starlark. """ def parse_simpleapi_html(*, url, content): """Get the package URLs for given shas by parsing the Simple API HTML. Args: url(str): The URL that the HTML content can be downloaded from. content(str): The Simple API HTML content. Returns: A list of structs with: * filename: The filename of the artifact. * url: The URL to download the artifact. * sha256: The sha256 of the artifact. * metadata_sha256: The whl METADATA sha256 if we can download it. If this is present, then the 'metadata_url' is also present. Defaults to "". * metadata_url: The URL for the METADATA if we can download it. Defaults to "". """ sdists = {} whls = {} lines = content.split("= (2, 0): # We don't expect to have version 2.0 here, but have this check in place just in case. # https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api fail("Unsupported API version: {}".format(api_version)) # Each line follows the following pattern # filename
for line in lines[1:]: dist_url, _, tail = line.partition("#sha256=") sha256, _, tail = tail.partition("\"") # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api yanked = "data-yanked" in line head, _, _ = tail.rpartition("") maybe_metadata, _, filename = head.rpartition(">") metadata_sha256 = "" metadata_url = "" for metadata_marker in ["data-core-metadata", "data-dist-info-metadata"]: metadata_marker = metadata_marker + "=\"sha256=" if metadata_marker in maybe_metadata: # Implement https://peps.python.org/pep-0714/ _, _, tail = maybe_metadata.partition(metadata_marker) metadata_sha256, _, _ = tail.partition("\"") metadata_url = dist_url + ".metadata" break if filename.endswith(".whl"): whls[sha256] = struct( filename = filename, url = _absolute_url(url, dist_url), sha256 = sha256, metadata_sha256 = metadata_sha256, metadata_url = _absolute_url(url, metadata_url) if metadata_url else "", yanked = yanked, ) else: sdists[sha256] = struct( filename = filename, url = _absolute_url(url, dist_url), sha256 = sha256, metadata_sha256 = "", metadata_url = "", yanked = yanked, ) return struct( sdists = sdists, whls = whls, ) def _get_root_directory(url): scheme_end = url.find("://") if scheme_end == -1: fail("Invalid URL format") scheme = url[:scheme_end] host_end = url.find("/", scheme_end + 3) if host_end == -1: host_end = len(url) host = url[scheme_end + 3:host_end] return "{}://{}".format(scheme, host) def _is_downloadable(url): """Checks if the URL would be accepted by the Bazel downloader. This is based on Bazel's HttpUtils::isUrlSupportedByDownloader """ return url.startswith("http://") or url.startswith("https://") or url.startswith("file://") def _absolute_url(index_url, candidate): if candidate == "": return candidate if _is_downloadable(candidate): return candidate if candidate.startswith("/"): # absolute path root_directory = _get_root_directory(index_url) return "{}{}".format(root_directory, candidate) if candidate.startswith(".."): # relative path with up references candidate_parts = candidate.split("..") last = candidate_parts[-1] for _ in range(len(candidate_parts) - 1): index_url, _, _ = index_url.rstrip("/").rpartition("/") return "{}/{}".format(index_url, last.strip("/")) # relative path without up-references return "{}/{}".format(index_url, candidate)