| # Copyright 2023 The Bazel Authors. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| """SBOM generator. |
| |
| This tool takes input from several sources and weaves together an SBOM. |
| |
| Inputs: |
| - the output of packages_used. This is a JSON block of license, package_info |
| and other declarations, plus a list of all remote packages referenced. |
| - the maven lock file (maven_install.json) |
| - FUTURE: other packgage lock files |
| - FUTURE: a user provided override of package URL to corrected information |
| |
| This tool is private to the sbom() rule. |
| """ |
| |
| import argparse |
| import datetime |
| import hashlib |
| import json |
| |
| |
| # pylint: disable=g-bare-generic |
| def create_sbom(package_info: dict, maven_packages: dict) -> dict: |
| """Creates a dict representing an SBOM. |
| |
| Args: |
| package_info: dict of data from packages_used output. |
| maven_packages: packages gleaned from Maven lock file. |
| |
| Returns: |
| dict of SBOM data |
| """ |
| now = datetime.datetime.now(datetime.timezone.utc) |
| ret = { |
| "spdxVersion": "SPDX-2.3", |
| "dataLicense": "CC0-1.0", |
| "SPDXID": "SPDXRef-DOCUMENT", |
| "documentNamespace": ( |
| "https://spdx.google/be852459-4c54-4c50-9d2f-0e48890418fc" |
| ), |
| "name": package_info["top_level_target"], |
| "creationInfo": { |
| "licenseListVersion": "", |
| "creators": [ |
| "Tool: github.com/bazelbuild/bazel/tools/compliance/write_sbom", |
| "Organization: Google LLC", |
| ], |
| "created": now.isoformat(), |
| }, |
| } |
| |
| packages = [] |
| relationships = [] |
| |
| relationships.append({ |
| "spdxElementId": "SPDXRef-DOCUMENT", |
| "relatedSpdxElement": "SPDXRef-Package-main", |
| "relationshipType": "DESCRIBES" |
| }) |
| |
| # This is bazel private shenanigans. |
| magic_file_suffix = "//file:file" |
| |
| for pkg in package_info["packages"]: |
| tmp_id = hashlib.md5() |
| tmp_id.update(pkg.encode("utf-8")) |
| spdxid = "SPDXRef-GooglePackage-%s" % tmp_id.hexdigest() |
| pi = { |
| "name": pkg, |
| "downloadLocation": "NOASSERTION", |
| "SPDXID": spdxid, |
| # TODO(aiuto): Fill in the rest |
| # "supplier": "Organization: Google LLC", |
| # "licenseConcluded": "License-XXXXXX", |
| # "copyrightText": "" |
| } |
| |
| have_maven = None |
| if pkg.startswith("@maven//:"): |
| have_maven = maven_packages.get(pkg[9:]) |
| elif pkg.endswith(magic_file_suffix): |
| # Bazel hacks jvm_external to add //file:file as a target, then we depend |
| # on that rather than the correct thing. |
| # Example: @org_apache_tomcat_tomcat_annotations_api_8_0_5//file:file |
| # Check for just the versioned root |
| have_maven = maven_packages.get(pkg[1 : -len(magic_file_suffix)]) |
| |
| if have_maven: |
| pi["downloadLocation"] = have_maven["url"] |
| else: |
| # TODO(aiuto): Do something better for this case. |
| print("MISSING ", pkg) |
| |
| packages.append(pi) |
| relationships.append({ |
| "spdxElementId": "SPDXRef-Package-main", |
| "relatedSpdxElement": spdxid, |
| "relationshipType": "CONTAINS", |
| }) |
| |
| ret["packages"] = packages |
| ret["relationships"] = relationships |
| return ret |
| |
| |
| def maven_to_bazel(s): |
| """Returns a string with maven separators mapped to what we use in Bazel. |
| |
| Essentially '.', '-', ':' => '_'. |
| |
| Args: |
| s: a string |
| |
| Returns: |
| a string |
| """ |
| return s.replace(".", "_").replace("-", "_").replace(":", "_") |
| |
| |
| # pylint: disable=g-bare-generic |
| def maven_install_to_packages(maven_install: dict) -> dict: |
| """Convert raw maven lock file into a dict keyed by bazel package names. |
| |
| Args: |
| maven_install: raw maven lock file data |
| |
| Returns: |
| dict keyed by names created by rules_jvm_external |
| """ |
| |
| # Map repo coordinate back to the download repository. |
| # The input dict is of the form |
| # "https//repo1.maven.org/": [ com.google.foo:some.package, ...] |
| # But.... sometimes the artifact is |
| # com.google.foo:some.package.jar.arch |
| # and then that means the artifact table has an entry |
| # in their shasums table keyed by arch. |
| |
| repo_to_url = {} |
| for url, repos in maven_install["repositories"].items(): |
| for repo in repos: |
| if repo in repo_to_url: |
| print( |
| "WARNING: Duplicate download path for <%s>. Using %s" |
| % (repo, repo_to_url[repo]) |
| ) |
| continue |
| repo_to_url[repo] = url |
| |
| ret = {} |
| for name, info in maven_install["artifacts"].items(): |
| repo, artifact = name.split(":") |
| version = info["version"] |
| |
| for arch in info["shasums"].keys(): |
| # build the download URL |
| sub_version = version |
| repo_name = name |
| if arch != "jar": |
| sub_version = version + "-" + arch |
| repo_name = "%s:jar:%s" % (name, arch) |
| |
| url = ( |
| "{mirror}{repo}/{artifact}/{version}/{artifact}-{version}.jar".format( |
| mirror=repo_to_url[repo_name], |
| repo=repo.replace(".", "/"), |
| artifact=artifact, |
| version=version, |
| ) |
| ) |
| tmp = info.copy() |
| tmp["maven_name"] = name |
| tmp["url"] = url |
| bazel_name = maven_to_bazel(name) + "_" + maven_to_bazel(sub_version) |
| ret[bazel_name] = tmp |
| if arch == "jar": |
| ret[bazel_name] = tmp |
| return ret |
| |
| |
| def main() -> None: |
| parser = argparse.ArgumentParser( |
| description="Helper for creating SBOMs", fromfile_prefix_chars="@" |
| ) |
| parser.add_argument( |
| "--out", required=True, help="The output file, mandatory." |
| ) |
| parser.add_argument( |
| "--packages_used", |
| required=True, |
| help="JSON list of transitive package data for a target", |
| ) |
| parser.add_argument( |
| "--maven_install", |
| required=False, |
| default="", |
| help="Maven lock file", |
| ) |
| opts = parser.parse_args() |
| |
| with open(opts.packages_used, "rt", encoding="utf-8") as inp: |
| package_info = json.loads(inp.read()) |
| |
| maven_packages = None |
| if opts.maven_install: |
| with open(opts.maven_install, "rt", encoding="utf-8") as inp: |
| maven_install = json.loads(inp.read()) |
| maven_packages = maven_install_to_packages(maven_install) |
| # Useful for debugging |
| # print(json.dumps(maven_packages, indent=2)) |
| |
| sbom = create_sbom(package_info, maven_packages) |
| with open(opts.out, "w", encoding="utf-8") as out: |
| out.write(json.dumps(sbom, indent=2)) |
| |
| |
| if __name__ == "__main__": |
| main() |