Add support for a dedicated pipeline machine and improve the infra scripts.
diff --git a/buildkite/create_images.py b/buildkite/create_images.py
index 685747c..087db36 100755
--- a/buildkite/create_images.py
+++ b/buildkite/create_images.py
@@ -83,14 +83,19 @@
'https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx'
]
},
- # 'buildkite-windows2016': {
- # 'source_image_project': 'windows-cloud',
- # 'source_image_family': 'windows-2016',
- # 'target_image_family': 'buildkite-windows2016',
- # 'scripts': [
- # 'setup-windows2016.ps1'
- # ]
- # }
+ 'buildkite-pipeline-ubuntu1604': {
+ 'source_image_project': 'ubuntu-os-cloud',
+ 'source_image_family': 'ubuntu-1604-lts',
+ 'target_image_family': 'buildkite-pipeline-ubuntu1604',
+ 'scripts': [
+ 'shell-utils.sh',
+ 'setup-ubuntu.sh',
+ 'install-azul-zulu.sh',
+ 'install-buildkite-agent.sh',
+ 'install-python36.sh',
+ 'shutdown.sh'
+ ]
+ },
'buildkite-windows': {
'source_image_project': 'windows-cloud',
'source_image_family': 'windows-1709-core',
diff --git a/buildkite/create_instance_groups.py b/buildkite/create_instance_groups.py
deleted file mode 100755
index 423b340..0000000
--- a/buildkite/create_instance_groups.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright 2018 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import queue
-import re
-import subprocess
-import sys
-import threading
-
-DEBUG = True
-
-LOCATION = 'europe-west1-d'
-
-AGENTS = {
- 'buildkite-ubuntu1404': {
- 'count': 8,
- 'startup_script': 'startup-ubuntu.sh',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=nvme',
- },
- 'buildkite-ubuntu1604': {
- 'count': 8,
- 'startup_script': 'startup-ubuntu.sh',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=nvme',
- },
- 'buildkite-windows': {
- 'count': 4,
- 'startup_script': 'startup-windows.ps1',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=scsi',
- },
- 'buildkite-freebsd11': {
- 'count': 2,
- 'startup_script': 'startup-ubuntu.sh',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=scsi',
- }
-}
-
-PRINT_LOCK = threading.Lock()
-WORK_QUEUE = queue.Queue()
-
-
-def debug(*args, **kwargs):
- if DEBUG:
- print(*args, **kwargs)
-
-
-def run(args, **kwargs):
- debug('Running: %s' % ' '.join(args))
- return subprocess.run(args, **kwargs)
-
-
-def delete_template(template_name):
- result = run(['gcloud', 'compute', 'instance-templates', 'delete', template_name, '--quiet'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
- if result.returncode != 0:
- # It's not an error if 'delete' failed, because the template didn't exist in the first place.
- # But we do want to error out on other unexpected errors.
- if not re.search(r'The resource .* was not found', result.stdout):
- raise Exception('"gcloud compute instance-templates delete" returned unexpected error:\n%s' % result.stdout)
- return result
-
-
-def create_template(template_name, image_family, params):
- cmd = ['gcloud', 'compute', 'instance-templates', 'create', template_name]
- cmd.extend(['--machine-type', params['machine_type']])
- cmd.extend(['--network', 'buildkite'])
- if 'windows' in image_family:
- cmd.extend(['--metadata-from-file', 'windows-startup-script-ps1=' + params['startup_script']])
- else:
- cmd.extend(['--metadata-from-file', 'startup-script=' + params['startup_script']])
- cmd.extend(['--min-cpu-platform', 'Intel Skylake'])
- cmd.extend(['--boot-disk-type', 'pd-ssd'])
- cmd.extend(['--boot-disk-size', '50GB'])
- if 'local_ssd' in params:
- cmd.extend(['--local-ssd', params['local_ssd']])
- cmd.extend(['--image-project', 'bazel-public'])
- cmd.extend(['--image-family', image_family])
- cmd.extend(['--service-account', 'remote-account@bazel-public.iam.gserviceaccount.com'])
- cmd.extend(['--scopes', 'cloud-platform'])
- run(cmd)
-
-
-def delete_instance_group(group_name):
- result = run(['gcloud', 'compute', 'instance-groups', 'managed', 'delete', group_name, '--zone', LOCATION, '--quiet'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
- if result.returncode != 0:
- # It's not an error if 'delete' failed, because the template didn't exist in the first place.
- # But we do want to error out on other unexpected errors.
- if not re.search(r'The resource .* was not found', result.stdout):
- raise Exception('"gcloud compute instance-groups managed delete" returned unexpected error:\n%s' % result.stdout)
- return result
-
-
-def create_instance_group(group_name, template_name, count):
- return run(['gcloud', 'compute', 'instance-groups', 'managed', 'create', group_name, '--zone', LOCATION, '--base-instance-name', group_name, '--template', template_name, '--size', str(count)])
-
-
-def workflow(image_family, params):
- template_name = image_family + '-template'
- group_name = image_family
-
- if delete_instance_group(group_name).returncode == 0:
- print('Deleted existing instance group: %s' % group_name)
- if delete_template(template_name).returncode == 0:
- print('Deleted existing VM template: %s' % template_name)
- create_template(template_name, image_family, params)
- create_instance_group(group_name, template_name, params['count'])
-
-
-def worker():
- while True:
- item = WORK_QUEUE.get()
- if not item:
- break
- try:
- workflow(**item)
- finally:
- WORK_QUEUE.task_done()
-
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv[1:]
-
- # Put VM creation instructions into the work queue.
- for image_family, params in AGENTS.items():
- if argv and image_family not in argv:
- continue
- WORK_QUEUE.put({
- 'image_family': image_family,
- 'params': params
- })
-
- # Spawn worker threads that will create the VMs.
- threads = []
- for _ in range(WORK_QUEUE.qsize()):
- t = threading.Thread(target=worker)
- t.start()
- threads.append(t)
-
- # Wait for all VMs to be created.
- WORK_QUEUE.join()
-
- # Signal worker threads to exit.
- for _ in range(len(threads)):
- WORK_QUEUE.put(None)
-
- # Wait for worker threads to exit.
- for t in threads:
- t.join()
-
- return 0
-
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/buildkite/create_instances.py b/buildkite/create_instances.py
new file mode 100755
index 0000000..5085116
--- /dev/null
+++ b/buildkite/create_instances.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+#
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import getpass
+import queue
+import re
+import subprocess
+import sys
+import threading
+
+DEBUG = True
+
+LOCATION = 'europe-west1-d'
+
+INSTANCE_GROUPS = {
+ 'buildkite-ubuntu1404': {
+ 'count': 8,
+ 'startup_script': 'startup-ubuntu.sh',
+ 'machine_type': 'n1-standard-32',
+ 'local_ssd': 'interface=nvme',
+ },
+ 'buildkite-ubuntu1604': {
+ 'count': 8,
+ 'startup_script': 'startup-ubuntu.sh',
+ 'machine_type': 'n1-standard-32',
+ 'local_ssd': 'interface=nvme',
+ },
+ 'buildkite-windows': {
+ 'count': 4,
+ 'startup_script': 'startup-windows.ps1',
+ 'machine_type': 'n1-standard-32',
+ 'local_ssd': 'interface=scsi',
+ },
+ 'buildkite-freebsd11': {
+ 'count': 2,
+ 'startup_script': 'startup-ubuntu.sh',
+ 'machine_type': 'n1-standard-32',
+ 'local_ssd': 'interface=scsi',
+ }
+}
+
+SINGLE_INSTANCES = {
+ 'buildkite-pipeline-ubuntu1604': {
+ 'startup_script': 'startup-ubuntu.sh',
+ 'machine_type': 'n1-standard-8',
+ 'persistent_disk': 'buildkite-pipeline-persistent'
+ },
+ '{}-ubuntu1604'.format(getpass.getuser()): {
+ 'image': 'buildkite-ubuntu1604',
+ 'startup_script': 'startup-ubuntu.sh',
+ 'machine_type': 'n1-standard-32',
+ 'local_ssd': 'interface=nvme',
+ },
+ '{}-windows'.format(getpass.getuser()): {
+ 'image': 'buildkite-windows',
+ 'startup_script': 'startup-windows.ps1',
+ 'machine_type': 'n1-standard-32',
+ 'local_ssd': 'interface=scsi',
+ }
+}
+
+PRINT_LOCK = threading.Lock()
+WORK_QUEUE = queue.Queue()
+
+
+def debug(*args, **kwargs):
+ if DEBUG:
+ print(*args, **kwargs)
+
+
+def run(args, **kwargs):
+ debug('Running: {}'.format(' '.join(args)))
+ return subprocess.run(args, **kwargs)
+
+
+def flags_for_instance(image_family, params):
+ cmd = ['--machine-type', params['machine_type']]
+ cmd.extend(['--network', 'buildkite'])
+ if 'windows' in image_family:
+ cmd.extend(['--metadata-from-file', 'windows-startup-script-ps1=' + params['startup_script']])
+ else:
+ cmd.extend(['--metadata-from-file', 'startup-script=' + params['startup_script']])
+ cmd.extend(['--min-cpu-platform', 'Intel Skylake'])
+ cmd.extend(['--boot-disk-type', 'pd-ssd'])
+ cmd.extend(['--boot-disk-size', params.get('boot_disk_size', '50GB')])
+ if 'local_ssd' in params:
+ cmd.extend(['--local-ssd', params['local_ssd']])
+ if 'persistent_disk' in params:
+ cmd.extend(['--disk',
+ 'name={0},device-name={0},mode=rw,boot=no'.format(params['persistent_disk'])])
+ cmd.extend(['--image-project', 'bazel-public'])
+ cmd.extend(['--image-family', image_family])
+ cmd.extend(['--service-account', 'remote-account@bazel-public.iam.gserviceaccount.com'])
+ cmd.extend(['--scopes', 'cloud-platform'])
+ return cmd
+
+
+def delete_instance_template(template_name):
+ cmd = ['gcloud', 'compute', 'instance-templates', 'delete', template_name, '--quiet']
+ result = run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
+ if result.returncode != 0:
+ # It's not an error if 'delete' failed, because the template didn't exist in the first place.
+ # But we do want to error out on other unexpected errors.
+ if not re.search(r'The resource .* was not found', result.stdout):
+ raise Exception('"gcloud compute instance-templates delete" returned unexpected error:\n{}'.format(result.stdout))
+ return result
+
+
+def create_instance_template(template_name, image_family, params):
+ cmd = ['gcloud', 'compute', 'instance-templates', 'create', template_name]
+ cmd.extend(flags_for_instance(image_family, params))
+ run(cmd)
+
+
+def delete_instance(instance_name):
+ return run(['gcloud', 'compute', 'instances', 'delete', '--quiet', instance_name])
+
+
+def create_instance(instance_name, image_family, params):
+ cmd = ['gcloud', 'compute', 'instance', 'create', instance_name]
+ cmd.extend(['--zone', LOCATION])
+ cmd.extend(flags_for_instance(image_family, params))
+ run(cmd)
+
+
+def delete_instance_group(instance_group_name):
+ cmd = ['gcloud', 'compute', 'instance-groups', 'managed', 'delete', instance_group_name]
+ cmd.extend(['--zone', LOCATION])
+ cmd.extend(['--quiet'])
+ result = run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
+ if result.returncode != 0:
+ # It's not an error if 'delete' failed, because the template didn't exist in the first place.
+ # But we do want to error out on other unexpected errors.
+ if not re.search(r'The resource .* was not found', result.stdout):
+ raise Exception('"gcloud compute instance-groups managed delete" returned unexpected error:\n{}'.format(result.stdout))
+ return result
+
+
+def create_instance_group(instance_group_name, template_name, count):
+ cmd = ['gcloud', 'compute', 'instance-groups', 'managed', 'create', instance_group_name]
+ cmd.extend(['--zone', LOCATION])
+ cmd.extend(['--base-instance-name', instance_group_name])
+ cmd.extend(['--template', template_name])
+ cmd.extend(['--size', str(count)])
+ return run(cmd)
+
+
+def instance_group_task(instance_group_name, params):
+ image_family = params.get('image_family', instance_group_name)
+ template_name = instance_group_name + '-template'
+
+ if delete_instance_group(instance_group_name).returncode == 0:
+ print('Deleted existing instance group: {}'.format(instance_group_name))
+ if delete_instance_template(template_name).returncode == 0:
+ print('Deleted existing VM template: {}'.format(template_name))
+ create_instance_template(template_name, image_family, params)
+ create_instance_group(instance_group_name, template_name, params['count'])
+
+
+def single_instance_task(instance_name, params):
+ image_family = params.get('image_family', instance_name)
+
+ if delete_instance(instance_name).returncode == 0:
+ print('Deleted existing instance: {}'.format(instance_name))
+ create_instance(instance_name, image_family, params)
+
+
+def worker():
+ while True:
+ item = WORK_QUEUE.get()
+ if not item:
+ break
+ try:
+ if 'instance_group_name' in item:
+ instance_group_task(**item)
+ elif 'instance_name' in item:
+ single_instance_task(**item)
+ else:
+ raise Exception('Unknown task: {}'.format(item))
+ finally:
+ WORK_QUEUE.task_done()
+
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv[1:]
+
+ # Put VM creation instructions into the work queue.
+ for instance_group_name, params in INSTANCE_GROUPS.items():
+ if argv and instance_group_name not in argv:
+ continue
+ WORK_QUEUE.put({
+ 'instance_group_name': instance_group_name,
+ 'params': params
+ })
+
+ for instance_name, params in SINGLE_INSTANCES.items():
+ if argv and instance_name not in argv:
+ continue
+ WORK_QUEUE.put({
+ 'instance_name': instance_name,
+ 'params': params
+ })
+
+ # Spawn worker threads that will create the VMs.
+ threads = []
+ for _ in range(WORK_QUEUE.qsize()):
+ t = threading.Thread(target=worker)
+ t.start()
+ threads.append(t)
+
+ # Wait for all VMs to be created.
+ WORK_QUEUE.join()
+
+ # Signal worker threads to exit.
+ for _ in range(len(threads)):
+ WORK_QUEUE.put(None)
+
+ # Wait for worker threads to exit.
+ for t in threads:
+ t.join()
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/buildkite/create_worker.py b/buildkite/create_worker.py
deleted file mode 100755
index 1e97ee9..0000000
--- a/buildkite/create_worker.py
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright 2018 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import getpass
-import queue
-import subprocess
-import sys
-import threading
-
-DEBUG = True
-
-LOCATION = 'europe-west1-d'
-
-MACHINES = {
- 'buildkite-ubuntu1404': {
- 'startup_script': 'startup-ubuntu.sh',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=nvme',
- },
- 'buildkite-ubuntu1604': {
- 'startup_script': 'startup-ubuntu.sh',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=nvme',
- },
- 'buildkite-windows': {
- 'startup_script': 'startup-windows.ps1',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=scsi',
- },
- '%s-ubuntu1604' % getpass.getuser(): {
- 'image': 'buildkite-ubuntu1604',
- 'startup_script': 'startup-ubuntu.sh',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=nvme',
- },
- '%s-windows' % getpass.getuser(): {
- 'image': 'buildkite-windows',
- 'startup_script': 'startup-windows.ps1',
- 'machine_type': 'n1-standard-32',
- 'local_ssd': 'interface=scsi',
- }
-}
-
-PRINT_LOCK = threading.Lock()
-WORK_QUEUE = queue.Queue()
-
-
-def debug(*args, **kwargs):
- if DEBUG:
- with PRINT_LOCK:
- print(*args, **kwargs)
-
-
-def run(args, **kwargs):
- debug('Running: %s' % ' '.join(args))
- return subprocess.run(args, **kwargs)
-
-
-def delete_vm(vm):
- return run(['gcloud', 'compute', 'instances', 'delete', '--quiet', vm])
-
-
-def create_vm(vm, idx, params):
- if idx > 0:
- vm = '%s-%s' % (vm, idx)
- image_family = params.get('image', vm)
-
- if delete_vm(vm).returncode == 0:
- with PRINT_LOCK:
- print('Deleted existing VM: %s' % vm)
- cmd = ['gcloud', 'compute', 'instances', 'create', vm]
- cmd.extend(['--zone', LOCATION])
- cmd.extend(['--machine-type', params['machine_type']])
- cmd.extend(['--network', 'buildkite'])
- if 'windows' in image_family:
- cmd.extend(['--metadata-from-file', 'windows-startup-script-ps1=' + params['startup_script']])
- else:
- cmd.extend(['--metadata-from-file', 'startup-script=' + params['startup_script']])
- cmd.extend(['--min-cpu-platform', 'Intel Skylake'])
- cmd.extend(['--boot-disk-type', 'pd-ssd'])
- cmd.extend(['--boot-disk-size', '50GB'])
- if 'local_ssd' in params:
- cmd.extend(['--local-ssd', params['local_ssd']])
- cmd.extend(['--image-project', 'bazel-public'])
- cmd.extend(['--image-family', image_family])
- cmd.extend(['--service-account', 'remote-account@bazel-public.iam.gserviceaccount.com'])
- cmd.extend(['--scopes', 'cloud-platform'])
- run(cmd)
-
-
-def worker():
- while True:
- item = WORK_QUEUE.get()
- if not item:
- break
- try:
- create_vm(**item)
- finally:
- WORK_QUEUE.task_done()
-
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv[1:]
-
- # Put VM creation instructions into the work queue.
- for vm, params in MACHINES.items():
- if argv and vm not in argv:
- continue
- WORK_QUEUE.put({
- 'vm': vm,
- 'idx': 0,
- 'params': params
- })
-
- # Spawn worker threads that will create the VMs.
- threads = []
- for _ in range(WORK_QUEUE.qsize()):
- t = threading.Thread(target=worker)
- t.start()
- threads.append(t)
-
- # Wait for all VMs to be created.
- WORK_QUEUE.join()
-
- # Signal worker threads to exit.
- for _ in range(len(threads)):
- WORK_QUEUE.put(None)
-
- # Wait for worker threads to exit.
- for t in threads:
- t.join()
-
- return 0
-
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/buildkite/install-buildkite-agent.sh b/buildkite/install-buildkite-agent.sh
index 07e6f2c..d276bd1 100755
--- a/buildkite/install-buildkite-agent.sh
+++ b/buildkite/install-buildkite-agent.sh
@@ -16,11 +16,14 @@
# Deduce the operating system from the hostname and put it into the metadata.
case $(hostname) in
+ *pipeline*)
+ AGENT_TAGS="osname=pipeline,pipeline=true"
+ ;;
*ubuntu1404*)
- osname="ubuntu1404"
+ AGENT_TAGS="osname=ubuntu1404"
;;
*ubuntu1604*)
- osname="ubuntu1604"
+ AGENT_TAGS="osname=ubuntu1604"
;;
default)
echo "Could not deduce operating system from hostname: $(hostname)!"
@@ -28,12 +31,14 @@
esac
if [[ $(hostname) == *ubuntu* ]]; then
- apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 32A37959C2FA5C3C99EFBC32A79206696452D198 &> /dev/null
+ apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 \
+ --recv-keys 32A37959C2FA5C3C99EFBC32A79206696452D198 &> /dev/null
add-apt-repository -y "deb https://apt.buildkite.com/buildkite-agent unstable main"
apt-get -qqy update > /dev/null
apt-get -qqy install buildkite-agent > /dev/null
fi
+# Add the Buildkite agent hooks.
cat > /etc/buildkite-agent/hooks/environment <<'EOF'
#!/bin/bash
@@ -47,26 +52,41 @@
chmod 0500 /etc/buildkite-agent/hooks/*
chown -R buildkite-agent:buildkite-agent /etc/buildkite-agent
+# Write the Buildkite agent configuration.
cat > /etc/buildkite-agent/buildkite-agent.cfg <<EOF
token="xxx"
name="%hostname"
-tags="os=${osname},pipeline=true"
+tags="${AGENT_TAGS}"
tags-from-gcp=true
build-path="/var/lib/buildkite-agent/builds"
hooks-path="/etc/buildkite-agent/hooks"
plugins-path="/etc/buildkite-agent/plugins"
timestamp-lines=true
+EOF
+if [[ $(hostname) != *pipeline* ]]; then
+ # Stop the agent after each job on stateless worker machines.
+ cat >> /etc/buildkite-agent/buildkite-agent.cfg <<EOF
# Stop the agent (which will automatically be restarted) after each job.
disconnect-after-job=true
disconnect-after-job-timeout=86400
EOF
+fi
chmod 0400 /etc/buildkite-agent/buildkite-agent.cfg
chown -R buildkite-agent:buildkite-agent /etc/buildkite-agent
-# Do not start buildkite-agent automatically. The startup script will start it
-# when necessary.
-if [[ -e /bin/systemctl ]]; then
+# Some notes about our service config:
+#
+# - All Buildkite agents except the pipeline agent are stateless and need a special service config
+# that kills remaining processes and deletes temporary files.
+#
+# - We set the service to not launch automatically, as the startup script will start it once it is
+# done with setting up the local SSD and writing the agent configuration.
+if [[ $(hostname) == *pipeline* ]]; then
+ # This is a pipeline worker machine.
+ systemctl disable buildkite-agent
+elif [[ $(systemctl --version 2>/dev/null) ]]; then
+ # This is a normal worker machine with systemd (e.g. Ubuntu 16.04 LTS).
systemctl disable buildkite-agent
mkdir /etc/systemd/system/buildkite-agent.service.d
cat > /etc/systemd/system/buildkite-agent.service.d/override.conf <<'EOF'
@@ -79,7 +99,8 @@
ExecStopPost=/bin/sh -c 'docker ps -q | xargs -r docker kill'
ExecStopPost=/usr/bin/docker system prune -f --volumes
EOF
-else
+elif [[ $(init --version 2>/dev/null | grep upstart) ]]; then
+ # This is a normal worker machine with upstart (e.g. Ubuntu 14.04 LTS).
cat > /etc/init/buildkite-agent.conf <<'EOF'
description "buildkite-agent"
@@ -103,4 +124,7 @@
docker system prune -f --volumes
end script
EOF
+else
+ echo "Unknown operating system - has neither systemd nor upstart?"
+ exit 1
fi
diff --git a/buildkite/startup-ubuntu.sh b/buildkite/startup-ubuntu.sh
index f6e330b..8f4aa8a 100755
--- a/buildkite/startup-ubuntu.sh
+++ b/buildkite/startup-ubuntu.sh
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-set -eu
+set -euxo pipefail
# Use a local SSD if available, otherwise use a RAM disk for our builds.
# if [ -e /dev/nvme0n1 ]; then
@@ -32,18 +32,20 @@
if [ -e /dev/nvme0n1 ]; then
mkswap -f /dev/nvme0n1
swapon /dev/nvme0n1
+
+ # Move fast and lose data.
+ mount -t tmpfs -o mode=1777,uid=root,gid=root,size=$((100 * 1024 * 1024 * 1024)) tmpfs /tmp
+ mount -t tmpfs -o mode=0711,uid=root,gid=root,size=$((100 * 1024 * 1024 * 1024)) tmpfs /var/lib/docker
+ mount -t tmpfs -o mode=0755,uid=buildkite-agent,gid=buildkite-agent,size=$((100 * 1024 * 1024 * 1024)) tmpfs /var/lib/buildkite-agent
fi
-# Make /tmp a tmpfs.
-mount -t tmpfs -o mode=1777,uid=root,gid=root,size=$((100 * 1024 * 1024 * 1024)) tmpfs /tmp
-mount -t tmpfs -o mode=0711,uid=root,gid=root,size=$((100 * 1024 * 1024 * 1024)) tmpfs /var/lib/docker
-mount -t tmpfs -o mode=0755,uid=buildkite-agent,gid=buildkite-agent,size=$((100 * 1024 * 1024 * 1024)) tmpfs /var/lib/buildkite-agent
-
# Start Docker.
-if [[ -e /bin/systemctl ]]; then
- systemctl start docker
-else
- service docker start
+if [[ $(docker --version 2>/dev/null) ]]; then
+ if [[ $(systemctl --version 2>/dev/null) ]]; then
+ systemctl start docker
+ else
+ service docker start
+ fi
fi
# Get the Buildkite Token from GCS and decrypt it using KMS.