blob: 38fd01def05acf5c89ade66452163333daf95307 [file] [log] [blame]
# Copyright 2015 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This package manipulates Docker image layer metadata."""
from collections import namedtuple
import copy
import json
import os
import os.path
import sys
from tools.build_defs.docker import utils
from third_party.py import gflags
gflags.DEFINE_string(
'name', None, 'The name of the current layer')
gflags.DEFINE_string(
'base', None, 'The parent image')
gflags.DEFINE_string(
'output', None, 'The output file to generate')
gflags.DEFINE_string(
'layer', None, 'The current layer tar')
gflags.DEFINE_list(
'entrypoint', None,
'Override the "Entrypoint" of the previous layer')
gflags.DEFINE_list(
'command', None,
'Override the "Cmd" of the previous layer')
gflags.DEFINE_string(
'user', None, 'The username to run commands under')
gflags.DEFINE_list('labels', None, 'Augment the "Label" of the previous layer')
gflags.DEFINE_list(
'ports', None,
'Augment the "ExposedPorts" of the previous layer')
gflags.DEFINE_list(
'volumes', None,
'Augment the "Volumes" of the previous layer')
gflags.DEFINE_string(
'workdir', None,
'Set the working directory for the layer')
gflags.DEFINE_list(
'env', None,
'Augment the "Env" of the previous layer')
FLAGS = gflags.FLAGS
_MetadataOptionsT = namedtuple('MetadataOptionsT',
['name', 'parent', 'size', 'entrypoint', 'cmd',
'env', 'labels', 'ports', 'volumes', 'workdir',
'user'])
class MetadataOptions(_MetadataOptionsT):
"""Docker image layer metadata options."""
def __new__(cls,
name=None,
parent=None,
size=None,
entrypoint=None,
cmd=None,
user=None,
labels=None,
env=None,
ports=None,
volumes=None,
workdir=None):
"""Constructor."""
return super(MetadataOptions, cls).__new__(cls,
name=name,
parent=parent,
size=size,
entrypoint=entrypoint,
cmd=cmd,
user=user,
labels=labels,
env=env,
ports=ports,
volumes=volumes,
workdir=workdir)
_DOCKER_VERSION = '1.5.0'
_PROCESSOR_ARCHITECTURE = 'amd64'
_OPERATING_SYSTEM = 'linux'
def Resolve(value, environment):
"""Resolves environment variables embedded in the given value."""
outer_env = os.environ
try:
os.environ = environment
return os.path.expandvars(value)
finally:
os.environ = outer_env
def DeepCopySkipNull(data):
"""Do a deep copy, skipping null entry."""
if type(data) == type(dict()):
return dict((DeepCopySkipNull(k), DeepCopySkipNull(v))
for k, v in data.iteritems() if v is not None)
return copy.deepcopy(data)
def KeyValueToDict(pair):
"""Converts an iterable object of key=value pairs to dictionary."""
d = dict()
for kv in pair:
(k, v) = kv.split('=', 1)
d[k] = v
return d
def RewriteMetadata(data, options):
"""Rewrite and return a copy of the input data according to options.
Args:
data: The dict of Docker image layer metadata we're copying and rewriting.
options: The changes this layer makes to the overall image's metadata, which
first appears in this layer's version of the metadata
Returns:
A deep copy of data, which has been updated to reflect the metadata
additions of this layer.
Raises:
Exception: a required option was missing.
"""
output = DeepCopySkipNull(data)
if not options.name:
raise Exception('Missing required option: name')
output['id'] = options.name
if options.parent:
output['parent'] = options.parent
elif data:
raise Exception('Expected empty input object when parent is omitted')
if options.size:
output['Size'] = options.size
elif 'Size' in output:
del output['Size']
if 'config' not in output:
output['config'] = {}
if options.entrypoint:
output['config']['Entrypoint'] = options.entrypoint
if options.cmd:
output['config']['Cmd'] = options.cmd
if options.user:
output['config']['User'] = options.user
output['docker_version'] = _DOCKER_VERSION
output['architecture'] = _PROCESSOR_ARCHITECTURE
output['os'] = _OPERATING_SYSTEM
def Dict2ConfigValue(d):
return ['%s=%s' % (k, d[k]) for k in sorted(d.keys())]
if options.env:
# Build a dictionary of existing environment variables (used by Resolve).
environ_dict = KeyValueToDict(output['config'].get('Env', []))
# Merge in new environment variables, resolving references.
for k, v in options.env.iteritems():
# Resolve handles scenarios like "PATH=$PATH:...".
environ_dict[k] = Resolve(v, environ_dict)
output['config']['Env'] = Dict2ConfigValue(environ_dict)
if options.labels:
label_dict = KeyValueToDict(output['config'].get('Label', []))
for k, v in options.labels.iteritems():
label_dict[k] = v
output['config']['Label'] = Dict2ConfigValue(label_dict)
if options.ports:
if 'ExposedPorts' not in output['config']:
output['config']['ExposedPorts'] = {}
for p in options.ports:
if '/' in p:
# The port spec has the form 80/tcp, 1234/udp
# so we simply use it as the key.
output['config']['ExposedPorts'][p] = {}
else:
# Assume tcp
output['config']['ExposedPorts'][p + '/tcp'] = {}
if options.volumes:
if 'Volumes' not in output['config']:
output['config']['Volumes'] = {}
for p in options.volumes:
output['config']['Volumes'][p] = {}
if options.workdir:
output['config']['WorkingDir'] = options.workdir
# TODO(mattmoor): comment, created, container_config
# container_config contains information about the container
# that was used to create this layer, so it shouldn't
# propagate from the parent to child. This is where we would
# annotate information that can be extract by tools like Blubber
# or Quay.io's UI to gain insight into the source that generated
# the layer. A Dockerfile might produce something like:
# # (nop) /bin/sh -c "apt-get update"
# We might consider encoding the fully-qualified bazel build target:
# //tools/build_defs/docker:image
# However, we should be sensitive to leaking data through this field.
if 'container_config' in output:
del output['container_config']
return output
def GetParentIdentifier(f):
"""Try to look at the parent identifier from a docker image.
The identifier is expected to be in the 'top' file for our rule so we look at
it first ('./top', 'top'). If it's not found, then we use the 'repositories'
file and tries to parse it to get the first declared repository (so we can
actually parse a file generated by 'docker save').
Args:
f: the input tar file.
Returns:
The identifier of the docker image, or None if no identifier was found.
"""
# TODO(dmarting): Maybe we could drop the 'top' file all together?
top = utils.GetTarFile(f, 'top')
if top:
return top.strip()
repositories = utils.GetTarFile(f, 'repositories')
if repositories:
data = json.loads(repositories)
for k1 in data:
for k2 in data[k1]:
# Returns the first found key
return data[k1][k2].strip()
return None
def main(unused_argv):
parent = ''
base_json = '{}'
if FLAGS.base:
parent = GetParentIdentifier(FLAGS.base)
if parent:
base_json = utils.GetTarFile(FLAGS.base, '%s/json' % parent)
data = json.loads(base_json)
name = FLAGS.name
if name.startswith('@'):
with open(name[1:], 'r') as f:
name = f.read()
labels = KeyValueToDict(FLAGS.labels)
for label, value in labels.iteritems():
if value.startswith('@'):
with open(value[1:], 'r') as f:
labels[label] = f.read()
output = RewriteMetadata(data,
MetadataOptions(name=name,
parent=parent,
size=os.path.getsize(FLAGS.layer),
entrypoint=FLAGS.entrypoint,
cmd=FLAGS.command,
user=FLAGS.user,
labels=labels,
env=KeyValueToDict(FLAGS.env),
ports=FLAGS.ports,
volumes=FLAGS.volumes,
workdir=FLAGS.workdir))
with open(FLAGS.output, 'w') as fp:
json.dump(output, fp, sort_keys=True)
fp.write('\n')
if __name__ == '__main__':
main(FLAGS(sys.argv))