| // Copyright 2016 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto3"; |
| |
| package google.genomics.v1alpha2; |
| |
| import "google/api/annotations.proto"; |
| import "google/longrunning/operations.proto"; |
| import "google/protobuf/duration.proto"; |
| import "google/protobuf/empty.proto"; |
| import "google/protobuf/timestamp.proto"; |
| import "google/rpc/code.proto"; |
| |
| option cc_enable_arenas = true; |
| option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics"; |
| option java_multiple_files = true; |
| option java_outer_classname = "PipelinesProto"; |
| option java_package = "com.google.genomics.v1a"; |
| |
| |
| // A service for running genomics pipelines. |
| service PipelinesV1Alpha2 { |
| // Creates a pipeline that can be run later. Create takes a Pipeline that |
| // has all fields other than `pipelineId` populated, and then returns |
| // the same pipeline with `pipelineId` populated. This id can be used |
| // to run the pipeline. |
| // |
| // Caller must have WRITE permission to the project. |
| rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) { |
| option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" }; |
| } |
| |
| // Runs a pipeline. If `pipelineId` is specified in the request, then |
| // run a saved pipeline. If `ephemeralPipeline` is specified, then run |
| // that pipeline once without saving a copy. |
| // |
| // The caller must have READ permission to the project where the pipeline |
| // is stored and WRITE permission to the project where the pipeline will be |
| // run, as VMs will be created and storage will be used. |
| rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) { |
| option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" }; |
| } |
| |
| // Retrieves a pipeline based on ID. |
| // |
| // Caller must have READ permission to the project. |
| rpc GetPipeline(GetPipelineRequest) returns (Pipeline) { |
| option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" }; |
| } |
| |
| // Lists pipelines. |
| // |
| // Caller must have READ permission to the project. |
| rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) { |
| option (google.api.http) = { get: "/v1alpha2/pipelines" }; |
| } |
| |
| // Deletes a pipeline based on ID. |
| // |
| // Caller must have WRITE permission to the project. |
| rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) { |
| option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" }; |
| } |
| |
| // Gets controller configuration information. Should only be called |
| // by VMs created by the Pipelines Service and not by end users. |
| rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) { |
| option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" }; |
| } |
| |
| // Sets status of a given operation. Any new timestamps (as determined by |
| // description) are appended to TimestampEvents. Should only be called by VMs |
| // created by the Pipelines Service and not by end users. |
| rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) { |
| option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" }; |
| } |
| } |
| |
| // Describes a Compute Engine resource that is being managed by a running |
| // [pipeline][google.genomics.v1alpha2.Pipeline]. |
| message ComputeEngine { |
| // The instance on which the operation is running. |
| string instance_name = 1; |
| |
| // The availability zone in which the instance resides. |
| string zone = 2; |
| |
| // The machine type of the instance. |
| string machine_type = 3; |
| |
| // The names of the disks that were created for this pipeline. |
| repeated string disk_names = 4; |
| } |
| |
| // Runtime metadata that will be populated in the |
| // [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata] |
| // field of the Operation associated with a RunPipeline execution. |
| message RuntimeMetadata { |
| // Execution information specific to Google Compute Engine. |
| ComputeEngine compute_engine = 1; |
| } |
| |
| // The pipeline object. Represents a transformation from a set of input |
| // parameters to a set of output parameters. The transformation is defined |
| // as a docker image and command to run within that image. Each pipeline |
| // is run on a Google Compute Engine VM. A pipeline can be created with the |
| // `create` method and then later run with the `run` method, or a pipeline can |
| // be defined and run all at once with the `run` method. |
| message Pipeline { |
| // Required. The project in which to create the pipeline. The caller must have |
| // WRITE access. |
| string project_id = 1; |
| |
| // Required. A user specified pipeline name that does not have to be unique. |
| // This name can be used for filtering Pipelines in ListPipelines. |
| string name = 2; |
| |
| // User-specified description. |
| string description = 3; |
| |
| // Input parameters of the pipeline. |
| repeated PipelineParameter input_parameters = 8; |
| |
| // Output parameters of the pipeline. |
| repeated PipelineParameter output_parameters = 9; |
| |
| // Required. The executor indicates in which environment the pipeline runs. |
| oneof executor { |
| // Specifies the docker run information. |
| DockerExecutor docker = 5; |
| } |
| |
| // Required. Specifies resource requirements for the pipeline run. |
| // Required fields: |
| // |
| // * |
| // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores] |
| // |
| // * |
| // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb] |
| PipelineResources resources = 6; |
| |
| // Unique pipeline id that is generated by the service when CreatePipeline |
| // is called. Cannot be specified in the Pipeline used in the |
| // CreatePipelineRequest, and will be populated in the response to |
| // CreatePipeline and all subsequent Get and List calls. Indicates that the |
| // service has registered this pipeline. |
| string pipeline_id = 7; |
| } |
| |
| // The request to create a pipeline. The pipeline field here should not have |
| // `pipelineId` populated, as that will be populated by the server. |
| message CreatePipelineRequest { |
| // The pipeline to create. Should not have `pipelineId` populated. |
| Pipeline pipeline = 1; |
| } |
| |
| // The pipeline run arguments. |
| message RunPipelineArgs { |
| // Required. The project in which to run the pipeline. The caller must have |
| // WRITER access to all Google Cloud services and resources (e.g. Google |
| // Compute Engine) will be used. |
| string project_id = 1; |
| |
| // Pipeline input arguments; keys are defined in the pipeline documentation. |
| // All input parameters that do not have default values must be specified. |
| // If parameters with defaults are specified here, the defaults will be |
| // overridden. |
| map<string, string> inputs = 2; |
| |
| // Pipeline output arguments; keys are defined in the pipeline |
| // documentation. All output parameters of without default values |
| // must be specified. If parameters with defaults are specified |
| // here, the defaults will be overridden. |
| map<string, string> outputs = 3; |
| |
| // The Google Cloud Service Account that will be used to access data and |
| // services. By default, the compute service account associated with |
| // `projectId` is used. |
| ServiceAccount service_account = 4; |
| |
| // This field is deprecated. Use `labels` instead. Client-specified pipeline |
| // operation identifier. |
| string client_id = 5; |
| |
| // Specifies resource requirements/overrides for the pipeline run. |
| PipelineResources resources = 6; |
| |
| // Required. Logging options. Used by the service to communicate results |
| // to the user. |
| LoggingOptions logging = 7; |
| |
| // How long to keep the VM up after a failure (for example docker command |
| // failed, copying input or output files failed, etc). While the VM is up, one |
| // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day. |
| google.protobuf.Duration keep_vm_alive_on_failure_duration = 8; |
| |
| // Labels to apply to this pipeline run. Labels will also be applied to |
| // compute resources (VM, disks) created by this pipeline run. When listing |
| // operations, operations can [filtered by labels] |
| // [google.longrunning.ListOperationsRequest.filter]. |
| // Label keys may not be empty; label values may be empty. Non-empty labels |
| // must be 1-63 characters long, and comply with [RFC1035] |
| // (https://www.ietf.org/rfc/rfc1035.txt). |
| // Specifically, the name must be 1-63 characters long and match the regular |
| // expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first |
| // character must be a lowercase letter, and all following characters must be |
| // a dash, lowercase letter, or digit, except the last character, which cannot |
| // be a dash. |
| map<string, string> labels = 9; |
| } |
| |
| // The request to run a pipeline. If `pipelineId` is specified, it |
| // refers to a saved pipeline created with CreatePipeline and set as |
| // the `pipelineId` of the returned Pipeline object. If |
| // `ephemeralPipeline` is specified, that pipeline is run once |
| // with the given args and not saved. It is an error to specify both |
| // `pipelineId` and `ephemeralPipeline`. `pipelineArgs` |
| // must be specified. |
| message RunPipelineRequest { |
| oneof pipeline { |
| // The already created pipeline to run. |
| string pipeline_id = 1; |
| |
| // A new pipeline object to run once and then delete. |
| Pipeline ephemeral_pipeline = 2; |
| } |
| |
| // The arguments to use when running this pipeline. |
| RunPipelineArgs pipeline_args = 3; |
| } |
| |
| // A request to get a saved pipeline by id. |
| message GetPipelineRequest { |
| // Caller must have READ access to the project in which this pipeline |
| // is defined. |
| string pipeline_id = 1; |
| } |
| |
| // A request to list pipelines in a given project. Pipelines can be |
| // filtered by name using `namePrefix`: all pipelines with names that |
| // begin with `namePrefix` will be returned. Uses standard pagination: |
| // `pageSize` indicates how many pipelines to return, and |
| // `pageToken` comes from a previous ListPipelinesResponse to |
| // indicate offset. |
| message ListPipelinesRequest { |
| // Required. The name of the project to search for pipelines. Caller |
| // must have READ access to this project. |
| string project_id = 1; |
| |
| // Pipelines with names that match this prefix should be |
| // returned. If unspecified, all pipelines in the project, up to |
| // `pageSize`, will be returned. |
| string name_prefix = 2; |
| |
| // Number of pipelines to return at once. Defaults to 256, and max |
| // is 2048. |
| int32 page_size = 3; |
| |
| // Token to use to indicate where to start getting results. |
| // If unspecified, returns the first page of results. |
| string page_token = 4; |
| } |
| |
| // The response of ListPipelines. Contains at most `pageSize` |
| // pipelines. If it contains `pageSize` pipelines, and more pipelines |
| // exist, then `nextPageToken` will be populated and should be |
| // used as the `pageToken` argument to a subsequent ListPipelines |
| // request. |
| message ListPipelinesResponse { |
| // The matched pipelines. |
| repeated Pipeline pipelines = 1; |
| |
| // The token to use to get the next page of results. |
| string next_page_token = 2; |
| } |
| |
| // The request to delete a saved pipeline by ID. |
| message DeletePipelineRequest { |
| // Caller must have WRITE access to the project in which this pipeline |
| // is defined. |
| string pipeline_id = 1; |
| } |
| |
| // Request to get controller configuation. Should only be used |
| // by VMs created by the Pipelines Service and not by end users. |
| message GetControllerConfigRequest { |
| // The operation to retrieve controller configuration for. |
| string operation_id = 1; |
| |
| uint64 validation_token = 2; |
| } |
| |
| // Stores the information that the controller will fetch from the |
| // server in order to run. Should only be used by VMs created by the |
| // Pipelines Service and not by end users. |
| message ControllerConfig { |
| message RepeatedString { |
| repeated string values = 1; |
| } |
| |
| string image = 1; |
| |
| string cmd = 2; |
| |
| string gcs_log_path = 3; |
| |
| string machine_type = 4; |
| |
| map<string, string> vars = 5; |
| |
| map<string, string> disks = 6; |
| |
| map<string, RepeatedString> gcs_sources = 7; |
| |
| map<string, RepeatedString> gcs_sinks = 8; |
| } |
| |
| // Stores the list of events and times they occured for major events in job |
| // execution. |
| message TimestampEvent { |
| // String indicating the type of event |
| string description = 1; |
| |
| // The time this event occured. |
| google.protobuf.Timestamp timestamp = 2; |
| } |
| |
| // Request to set operation status. Should only be used by VMs |
| // created by the Pipelines Service and not by end users. |
| message SetOperationStatusRequest { |
| string operation_id = 1; |
| |
| repeated TimestampEvent timestamp_events = 2; |
| |
| google.rpc.Code error_code = 3; |
| |
| string error_message = 4; |
| |
| uint64 validation_token = 5; |
| } |
| |
| // A Google Cloud Service Account. |
| message ServiceAccount { |
| // Email address of the service account. Defaults to `default`, |
| // which uses the compute service account associated with the project. |
| string email = 1; |
| |
| // List of scopes to be enabled for this service account on the VM. |
| // The following scopes are automatically included: |
| // |
| // * https://www.googleapis.com/auth/compute |
| // * https://www.googleapis.com/auth/devstorage.full_control |
| // * https://www.googleapis.com/auth/genomics |
| // * https://www.googleapis.com/auth/logging.write |
| // * https://www.googleapis.com/auth/monitoring.write |
| repeated string scopes = 2; |
| } |
| |
| // The logging options for the pipeline run. |
| message LoggingOptions { |
| // The location in Google Cloud Storage to which the pipeline logs |
| // will be copied. Can be specified as a fully qualified directory |
| // path, in which case logs will be output with a unique identifier |
| // as the filename in that directory, or as a fully specified path, |
| // which must end in `.log`, in which case that path will be |
| // used, and the user must ensure that logs are not |
| // overwritten. Stdout and stderr logs from the run are also |
| // generated and output as `-stdout.log` and `-stderr.log`. |
| string gcs_path = 1; |
| } |
| |
| // The system resources for the pipeline run. |
| message PipelineResources { |
| // A Google Compute Engine disk resource specification. |
| message Disk { |
| // The types of disks that may be attached to VMs. |
| enum Type { |
| // Default disk type. Use one of the other options below. |
| TYPE_UNSPECIFIED = 0; |
| |
| // Specifies a Google Compute Engine persistent hard disk. See |
| // https://cloud.google.com/compute/docs/disks/#pdspecs for details. |
| PERSISTENT_HDD = 1; |
| |
| // Specifies a Google Compute Engine persistent solid-state disk. See |
| // https://cloud.google.com/compute/docs/disks/#pdspecs for details. |
| PERSISTENT_SSD = 2; |
| |
| // Specifies a Google Compute Engine local SSD. |
| // See https://cloud.google.com/compute/docs/disks/local-ssd for details. |
| LOCAL_SSD = 3; |
| } |
| |
| // Required. The name of the disk that can be used in the pipeline |
| // parameters. Must be 1 - 63 characters. |
| // The name "boot" is reserved for system use. |
| string name = 1; |
| |
| // Required. The type of the disk to create. |
| Type type = 2; |
| |
| // The size of the disk. Defaults to 500 (GB). |
| // This field is not applicable for local SSD. |
| int32 size_gb = 3; |
| |
| // The full or partial URL of the persistent disk to attach. See |
| // https://cloud.google.com/compute/docs/reference/latest/instances#resource |
| // and |
| // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots |
| // for more details. |
| string source = 4; |
| |
| // Deprecated. Disks created by the Pipelines API will be deleted at the end |
| // of the pipeline run, regardless of what this field is set to. |
| bool auto_delete = 6; |
| |
| // Required at create time and cannot be overridden at run time. |
| // Specifies the path in the docker container where files on |
| // this disk should be located. For example, if `mountPoint` |
| // is `/mnt/disk`, and the parameter has `localPath` |
| // `inputs/file.txt`, the docker container can access the data at |
| // `/mnt/disk/inputs/file.txt`. |
| string mount_point = 8; |
| } |
| |
| // The minimum number of cores to use. Defaults to 1. |
| int32 minimum_cpu_cores = 1; |
| |
| // Whether to use preemptible VMs. Defaults to `false`. In order to use this, |
| // must be true for both create time and run time. Cannot be true at run time |
| // if false at create time. |
| bool preemptible = 2; |
| |
| // The minimum amount of RAM to use. Defaults to 3.75 (GB) |
| double minimum_ram_gb = 3; |
| |
| // Disks to attach. |
| repeated Disk disks = 4; |
| |
| // List of Google Compute Engine availability zones to which resource |
| // creation will restricted. If empty, any zone may be chosen. |
| repeated string zones = 5; |
| |
| // The size of the boot disk. Defaults to 10 (GB). |
| int32 boot_disk_size_gb = 6; |
| |
| // Whether to assign an external IP to the instance. This is an experimental |
| // feature that may go away. Defaults to false. |
| // Corresponds to `--no_address` flag for [gcloud compute instances create] |
| // (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create). |
| // In order to use this, must be true for both create time and run time. |
| // Cannot be true at run time if false at create time. If you need to ssh into |
| // a private IP VM for debugging, you can ssh to a public VM and then ssh into |
| // the private VM's Internal IP. If noAddress is set, this pipeline run may |
| // only load docker images from Google Container Registry and not Docker Hub. |
| // ** Note: To use this option, your project must be in Google Access for |
| // Private IPs Early Access Program.** |
| bool no_address = 7; |
| } |
| |
| // Parameters facilitate setting and delivering data into the |
| // pipeline's execution environment. They are defined at create time, |
| // with optional defaults, and can be overridden at run time. |
| // |
| // If `localCopy` is unset, then the parameter specifies a string that |
| // is passed as-is into the pipeline, as the value of the environment |
| // variable with the given name. A default value can be optionally |
| // specified at create time. The default can be overridden at run time |
| // using the inputs map. If no default is given, a value must be |
| // supplied at runtime. |
| // |
| // If `localCopy` is defined, then the parameter specifies a data |
| // source or sink, both in Google Cloud Storage and on the Docker container |
| // where the pipeline computation is run. The [service account associated with |
| // the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by |
| // default the project's Compute Engine service account) must have access to the |
| // Google Cloud Storage paths. |
| // |
| // At run time, the Google Cloud Storage paths can be overridden if a default |
| // was provided at create time, or must be set otherwise. The pipeline runner |
| // should add a key/value pair to either the inputs or outputs map. The |
| // indicated data copies will be carried out before/after pipeline execution, |
| // just as if the corresponding arguments were provided to `gsutil cp`. |
| // |
| // For example: Given the following `PipelineParameter`, specified |
| // in the `inputParameters` list: |
| // |
| // ``` |
| // {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}} |
| // ``` |
| // |
| // where `disk` is defined in the `PipelineResources` object as: |
| // |
| // ``` |
| // {name: "pd1", mountPoint: "/mnt/disk/"} |
| // ``` |
| // |
| // We create a disk named `pd1`, mount it on the host VM, and map |
| // `/mnt/pd1` to `/mnt/disk` in the docker container. At |
| // runtime, an entry for `input_file` would be required in the inputs |
| // map, such as: |
| // |
| // ``` |
| // inputs["input_file"] = "gs://my-bucket/bar.txt" |
| // ``` |
| // |
| // This would generate the following gsutil call: |
| // |
| // ``` |
| // gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt |
| // ``` |
| // |
| // The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the |
| // Docker container. Acceptable paths are: |
| // |
| // <table> |
| // <thead> |
| // <tr><th>Google Cloud storage path</th><th>Local path</th></tr> |
| // </thead> |
| // <tbody> |
| // <tr><td>file</td><td>file</td></tr> |
| // <tr><td>glob</td><td>directory</td></tr> |
| // </tbody> |
| // </table> |
| // |
| // For outputs, the direction of the copy is reversed: |
| // |
| // ``` |
| // gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt |
| // ``` |
| // |
| // Acceptable paths are: |
| // |
| // <table> |
| // <thead> |
| // <tr><th>Local path</th><th>Google Cloud Storage path</th></tr> |
| // </thead> |
| // <tbody> |
| // <tr><td>file</td><td>file</td></tr> |
| // <tr> |
| // <td>file</td> |
| // <td>directory - directory must already exist</td> |
| // </tr> |
| // <tr> |
| // <td>glob</td> |
| // <td>directory - directory will be created if it doesn't exist</td></tr> |
| // </tbody> |
| // </table> |
| // |
| // One restriction due to docker limitations, is that for outputs that are found |
| // on the boot disk, the local path cannot be a glob and must be a file. |
| message PipelineParameter { |
| // LocalCopy defines how a remote file should be copied to and from the VM. |
| message LocalCopy { |
| // Required. The path within the user's docker container where |
| // this input should be localized to and from, relative to the specified |
| // disk's mount point. For example: file.txt, |
| string path = 1; |
| |
| // Required. The name of the disk where this parameter is |
| // located. Can be the name of one of the disks specified in the |
| // Resources field, or "boot", which represents the Docker |
| // instance's boot disk and has a mount point of `/`. |
| string disk = 2; |
| } |
| |
| // Required. Name of the parameter - the pipeline runner uses this string |
| // as the key to the input and output maps in RunPipeline. |
| string name = 1; |
| |
| // Human-readable description. |
| string description = 2; |
| |
| // The default value for this parameter. Can be overridden at runtime. |
| // If `localCopy` is present, then this must be a Google Cloud Storage path |
| // beginning with `gs://`. |
| string default_value = 5; |
| |
| // If present, this parameter is marked for copying to and from the VM. |
| // `LocalCopy` indicates where on the VM the file should be. The value |
| // given to this parameter (either at runtime or using `defaultValue`) |
| // must be the remote path where the file should be. |
| LocalCopy local_copy = 6; |
| } |
| |
| // The Docker execuctor specification. |
| message DockerExecutor { |
| // Required. Image name from either Docker Hub or Google Container Registry. |
| // Users that run pipelines must have READ access to the image. |
| string image_name = 1; |
| |
| // Required. The command or newline delimited script to run. The command |
| // string will be executed within a bash shell. |
| // |
| // If the command exits with a non-zero exit code, output parameter |
| // de-localization will be skipped and the pipeline operation's |
| // [`error`][google.longrunning.Operation.error] field will be populated. |
| // |
| // Maximum command string length is 16384. |
| string cmd = 2; |
| } |