blob: f2a781f2c3e911576637b2b747320d85025823e4 [file] [log] [blame]
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""SBOM generator.
This tool takes input from several sources and weaves together an SBOM.
Inputs:
- the output of packages_used. This is a JSON block of license, package_info
and other declarations, plus a list of all remote packages referenced.
- the maven lock file (maven_install.json)
- FUTURE: other packgage lock files
- FUTURE: a user provided override of package URL to corrected information
This tool is private to the sbom() rule.
"""
import argparse
import datetime
import hashlib
import json
# pylint: disable=g-bare-generic
def create_sbom(package_info: dict, maven_packages: dict) -> dict:
"""Creates a dict representing an SBOM.
Args:
package_info: dict of data from packages_used output.
maven_packages: packages gleaned from Maven lock file.
Returns:
dict of SBOM data
"""
now = datetime.datetime.now(datetime.timezone.utc)
ret = {
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"documentNamespace": (
"https://spdx.google/be852459-4c54-4c50-9d2f-0e48890418fc"
),
"name": package_info["top_level_target"],
"creationInfo": {
"licenseListVersion": "",
"creators": [
"Tool: github.com/bazelbuild/bazel/tools/compliance/write_sbom",
"Organization: Google LLC",
],
"created": now.isoformat(),
},
}
packages = []
relationships = []
relationships.append({
"spdxElementId": "SPDXRef-DOCUMENT",
"relatedSpdxElement": "SPDXRef-Package-main",
"relationshipType": "DESCRIBES"
})
# This is bazel private shenanigans.
magic_file_suffix = "//file:file"
for pkg in package_info["packages"]:
tmp_id = hashlib.md5()
tmp_id.update(pkg.encode("utf-8"))
spdxid = "SPDXRef-GooglePackage-%s" % tmp_id.hexdigest()
pi = {
"name": pkg,
"downloadLocation": "NOASSERTION",
"SPDXID": spdxid,
# TODO(aiuto): Fill in the rest
# "supplier": "Organization: Google LLC",
# "licenseConcluded": "License-XXXXXX",
# "copyrightText": ""
}
have_maven = None
if pkg.startswith("@maven//:"):
have_maven = maven_packages.get(pkg[9:])
elif pkg.endswith(magic_file_suffix):
# Bazel hacks jvm_external to add //file:file as a target, then we depend
# on that rather than the correct thing.
# Example: @org_apache_tomcat_tomcat_annotations_api_8_0_5//file:file
# Check for just the versioned root
have_maven = maven_packages.get(pkg[1 : -len(magic_file_suffix)])
if have_maven:
pi["downloadLocation"] = have_maven["url"]
else:
# TODO(aiuto): Do something better for this case.
print("MISSING ", pkg)
packages.append(pi)
relationships.append({
"spdxElementId": "SPDXRef-Package-main",
"relatedSpdxElement": spdxid,
"relationshipType": "CONTAINS",
})
ret["packages"] = packages
ret["relationships"] = relationships
return ret
def maven_to_bazel(s):
"""Returns a string with maven separators mapped to what we use in Bazel.
Essentially '.', '-', ':' => '_'.
Args:
s: a string
Returns:
a string
"""
return s.replace(".", "_").replace("-", "_").replace(":", "_")
# pylint: disable=g-bare-generic
def maven_install_to_packages(maven_install: dict) -> dict:
"""Convert raw maven lock file into a dict keyed by bazel package names.
Args:
maven_install: raw maven lock file data
Returns:
dict keyed by names created by rules_jvm_external
"""
# Map repo coordinate back to the download repository.
# The input dict is of the form
# "https//repo1.maven.org/": [ com.google.foo:some.package, ...]
# But.... sometimes the artifact is
# com.google.foo:some.package.jar.arch
# and then that means the artifact table has an entry
# in their shasums table keyed by arch.
repo_to_url = {}
for url, repos in maven_install["repositories"].items():
for repo in repos:
if repo in repo_to_url:
print(
"WARNING: Duplicate download path for <%s>. Using %s"
% (repo, repo_to_url[repo])
)
continue
repo_to_url[repo] = url
ret = {}
for name, info in maven_install["artifacts"].items():
repo, artifact = name.split(":")
version = info["version"]
for arch in info["shasums"].keys():
# build the download URL
sub_version = version
repo_name = name
if arch != "jar":
sub_version = version + "-" + arch
repo_name = "%s:jar:%s" % (name, arch)
url = (
"{mirror}{repo}/{artifact}/{version}/{artifact}-{version}.jar".format(
mirror=repo_to_url[repo_name],
repo=repo.replace(".", "/"),
artifact=artifact,
version=version,
)
)
tmp = info.copy()
tmp["maven_name"] = name
tmp["url"] = url
bazel_name = maven_to_bazel(name) + "_" + maven_to_bazel(sub_version)
ret[bazel_name] = tmp
if arch == "jar":
ret[bazel_name] = tmp
return ret
def main() -> None:
parser = argparse.ArgumentParser(
description="Helper for creating SBOMs", fromfile_prefix_chars="@"
)
parser.add_argument(
"--out", required=True, help="The output file, mandatory."
)
parser.add_argument(
"--packages_used",
required=True,
help="JSON list of transitive package data for a target",
)
parser.add_argument(
"--maven_install",
required=False,
default="",
help="Maven lock file",
)
opts = parser.parse_args()
with open(opts.packages_used, "rt", encoding="utf-8") as inp:
package_info = json.loads(inp.read())
maven_packages = None
if opts.maven_install:
with open(opts.maven_install, "rt", encoding="utf-8") as inp:
maven_install = json.loads(inp.read())
maven_packages = maven_install_to_packages(maven_install)
# Useful for debugging
# print(json.dumps(maven_packages, indent=2))
sbom = create_sbom(package_info, maven_packages)
with open(opts.out, "w", encoding="utf-8") as out:
out.write(json.dumps(sbom, indent=2))
if __name__ == "__main__":
main()