| // Copyright 2016 Google Inc. | 
 | // | 
 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
 | // you may not use this file except in compliance with the License. | 
 | // You may obtain a copy of the License at | 
 | // | 
 | //     http://www.apache.org/licenses/LICENSE-2.0 | 
 | // | 
 | // Unless required by applicable law or agreed to in writing, software | 
 | // distributed under the License is distributed on an "AS IS" BASIS, | 
 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | // See the License for the specific language governing permissions and | 
 | // limitations under the License. | 
 |  | 
 | syntax = "proto3"; | 
 |  | 
 | package google.cloud.dataproc.v1; | 
 |  | 
 | import "google/api/annotations.proto"; | 
 | import "google/cloud/dataproc/v1/operations.proto"; | 
 | import "google/longrunning/operations.proto"; | 
 | import "google/protobuf/duration.proto"; | 
 | import "google/protobuf/field_mask.proto"; | 
 | import "google/protobuf/timestamp.proto"; | 
 |  | 
 | option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc"; | 
 | option java_multiple_files = true; | 
 | option java_outer_classname = "ClustersProto"; | 
 | option java_package = "com.google.cloud.dataproc.v1"; | 
 |  | 
 |  | 
 | // The ClusterControllerService provides methods to manage clusters | 
 | // of Google Compute Engine instances. | 
 | service ClusterController { | 
 |   // Creates a cluster in a project. | 
 |   rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) { | 
 |     option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters" body: "cluster" }; | 
 |   } | 
 |  | 
 |   // Updates a cluster in a project. | 
 |   rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) { | 
 |     option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" }; | 
 |   } | 
 |  | 
 |   // Deletes a cluster in a project. | 
 |   rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) { | 
 |     option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; | 
 |   } | 
 |  | 
 |   // Gets the resource representation for a cluster in a project. | 
 |   rpc GetCluster(GetClusterRequest) returns (Cluster) { | 
 |     option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; | 
 |   } | 
 |  | 
 |   // Lists all regions/{region}/clusters in a project. | 
 |   rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) { | 
 |     option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters" }; | 
 |   } | 
 |  | 
 |   // Gets cluster diagnostic information. | 
 |   // After the operation completes, the Operation.response field | 
 |   // contains `DiagnoseClusterOutputLocation`. | 
 |   rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) { | 
 |     option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" }; | 
 |   } | 
 | } | 
 |  | 
 | // Describes the identifying information, config, and status of | 
 | // a cluster of Google Compute Engine instances. | 
 | message Cluster { | 
 |   // [Required] The Google Cloud Platform project ID that the cluster belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The cluster name. Cluster names within a project must be | 
 |   // unique. Names of deleted clusters can be reused. | 
 |   string cluster_name = 2; | 
 |  | 
 |   // [Required] The cluster config. Note that Cloud Dataproc may set | 
 |   // default values, and values may change when clusters are updated. | 
 |   ClusterConfig config = 3; | 
 |  | 
 |   // [Output-only] Cluster status. | 
 |   ClusterStatus status = 4; | 
 |  | 
 |   // [Output-only] The previous cluster status. | 
 |   repeated ClusterStatus status_history = 7; | 
 |  | 
 |   // [Output-only] A cluster UUID (Unique Universal Identifier). Cloud Dataproc | 
 |   // generates this value when it creates the cluster. | 
 |   string cluster_uuid = 6; | 
 | } | 
 |  | 
 | // The cluster config. | 
 | message ClusterConfig { | 
 |   // [Optional] A Google Cloud Storage staging bucket used for sharing generated | 
 |   // SSH keys and config. If you do not specify a staging bucket, Cloud | 
 |   // Dataproc will determine an appropriate Cloud Storage location (US, | 
 |   // ASIA, or EU) for your cluster's staging bucket according to the Google | 
 |   // Compute Engine zone where your cluster is deployed, and then it will create | 
 |   // and manage this project-level, per-location bucket for you. | 
 |   string config_bucket = 1; | 
 |  | 
 |   // [Required] The shared Google Compute Engine config settings for | 
 |   // all instances in a cluster. | 
 |   GceClusterConfig gce_cluster_config = 8; | 
 |  | 
 |   // [Optional] The Google Compute Engine config settings for | 
 |   // the master instance in a cluster. | 
 |   InstanceGroupConfig master_config = 9; | 
 |  | 
 |   // [Optional] The Google Compute Engine config settings for | 
 |   // worker instances in a cluster. | 
 |   InstanceGroupConfig worker_config = 10; | 
 |  | 
 |   // [Optional] The Google Compute Engine config settings for | 
 |   // additional worker instances in a cluster. | 
 |   InstanceGroupConfig secondary_worker_config = 12; | 
 |  | 
 |   // [Optional] The config settings for software inside the cluster. | 
 |   SoftwareConfig software_config = 13; | 
 |  | 
 |   // [Optional] Commands to execute on each node after config is | 
 |   // completed. By default, executables are run on master and all worker nodes. | 
 |   // You can test a node's <code>role</code> metadata to run an executable on | 
 |   // a master or worker node, as shown below using `curl` (you can also use `wget`): | 
 |   // | 
 |   //     ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) | 
 |   //     if [[ "${ROLE}" == 'Master' ]]; then | 
 |   //       ... master specific actions ... | 
 |   //     else | 
 |   //       ... worker specific actions ... | 
 |   //     fi | 
 |   repeated NodeInitializationAction initialization_actions = 11; | 
 | } | 
 |  | 
 | // Common config settings for resources of Google Compute Engine cluster | 
 | // instances, applicable to all instances in the cluster. | 
 | message GceClusterConfig { | 
 |   // [Required] The zone where the Google Compute Engine cluster will be located. | 
 |   // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`. | 
 |   string zone_uri = 1; | 
 |  | 
 |   // [Optional] The Google Compute Engine network to be used for machine | 
 |   // communications. Cannot be specified with subnetwork_uri. If neither | 
 |   // `network_uri` nor `subnetwork_uri` is specified, the "default" network of | 
 |   // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see | 
 |   // [Using Subnetworks](/compute/docs/subnetworks) for more information). | 
 |   // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`. | 
 |   string network_uri = 2; | 
 |  | 
 |   // [Optional] The Google Compute Engine subnetwork to be used for machine | 
 |   // communications. Cannot be specified with network_uri. | 
 |   // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`. | 
 |   string subnetwork_uri = 6; | 
 |  | 
 |   // [Optional] If true, all instances in the cluster will only have internal IP | 
 |   // addresses. By default, clusters are not restricted to internal IP addresses, | 
 |   // and will have ephemeral external IP addresses assigned to each instance. | 
 |   // This `internal_ip_only` restriction can only be enabled for subnetwork | 
 |   // enabled networks, and all off-cluster dependencies must be configured to be | 
 |   // accessible without external IP addresses. | 
 |   bool internal_ip_only = 7; | 
 |  | 
 |   // [Optional] The URIs of service account scopes to be included in Google | 
 |   // Compute Engine instances. The following base set of scopes is always | 
 |   // included: | 
 |   // | 
 |   // * https://www.googleapis.com/auth/cloud.useraccounts.readonly | 
 |   // * https://www.googleapis.com/auth/devstorage.read_write | 
 |   // * https://www.googleapis.com/auth/logging.write | 
 |   // | 
 |   // If no scopes are specified, the following defaults are also provided: | 
 |   // | 
 |   // * https://www.googleapis.com/auth/bigquery | 
 |   // * https://www.googleapis.com/auth/bigtable.admin.table | 
 |   // * https://www.googleapis.com/auth/bigtable.data | 
 |   // * https://www.googleapis.com/auth/devstorage.full_control | 
 |   repeated string service_account_scopes = 3; | 
 |  | 
 |   // The Google Compute Engine tags to add to all instances (see | 
 |   // [Labeling instances](/compute/docs/label-or-tag-resources#labeling_instances)). | 
 |   repeated string tags = 4; | 
 |  | 
 |   // The Google Compute Engine metadata entries to add to all instances (see | 
 |   // [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). | 
 |   map<string, string> metadata = 5; | 
 | } | 
 |  | 
 | // [Optional] The config settings for Google Compute Engine resources in | 
 | // an instance group, such as a master or worker group. | 
 | message InstanceGroupConfig { | 
 |   // [Required] The number of VM instances in the instance group. | 
 |   // For master instance groups, must be set to 1. | 
 |   int32 num_instances = 1; | 
 |  | 
 |   // [Optional] The list of instance names. Cloud Dataproc derives the names from | 
 |   // `cluster_name`, `num_instances`, and the instance group if not set by user | 
 |   // (recommended practice is to let Cloud Dataproc derive the name). | 
 |   repeated string instance_names = 2; | 
 |  | 
 |   // [Output-only] The Google Compute Engine image resource used for cluster | 
 |   // instances. Inferred from `SoftwareConfig.image_version`. | 
 |   string image_uri = 3; | 
 |  | 
 |   // [Required] The Google Compute Engine machine type used for cluster instances. | 
 |   // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`. | 
 |   string machine_type_uri = 4; | 
 |  | 
 |   // [Optional] Disk option config settings. | 
 |   DiskConfig disk_config = 5; | 
 |  | 
 |   // [Optional] Specifies that this instance group contains preemptible instances. | 
 |   bool is_preemptible = 6; | 
 |  | 
 |   // [Output-only] The config for Google Compute Engine Instance Group | 
 |   // Manager that manages this group. | 
 |   // This is only used for preemptible instance groups. | 
 |   ManagedGroupConfig managed_group_config = 7; | 
 | } | 
 |  | 
 | // Specifies the resources used to actively manage an instance group. | 
 | message ManagedGroupConfig { | 
 |   // [Output-only] The name of the Instance Template used for the Managed | 
 |   // Instance Group. | 
 |   string instance_template_name = 1; | 
 |  | 
 |   // [Output-only] The name of the Instance Group Manager for this group. | 
 |   string instance_group_manager_name = 2; | 
 | } | 
 |  | 
 | // Specifies the config of disk options for a group of VM instances. | 
 | message DiskConfig { | 
 |   // [Optional] Size in GB of the boot disk (default is 500GB). | 
 |   int32 boot_disk_size_gb = 1; | 
 |  | 
 |   // [Optional] Number of attached SSDs, from 0 to 4 (default is 0). | 
 |   // If SSDs are not attached, the boot disk is used to store runtime logs and | 
 |   // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. | 
 |   // If one or more SSDs are attached, this runtime bulk | 
 |   // data is spread across them, and the boot disk contains only basic | 
 |   // config and installed binaries. | 
 |   int32 num_local_ssds = 2; | 
 | } | 
 |  | 
 | // Specifies an executable to run on a fully configured node and a | 
 | // timeout period for executable completion. | 
 | message NodeInitializationAction { | 
 |   // [Required] Google Cloud Storage URI of executable file. | 
 |   string executable_file = 1; | 
 |  | 
 |   // [Optional] Amount of time executable has to complete. Default is | 
 |   // 10 minutes. Cluster creation fails with an explanatory error message (the | 
 |   // name of the executable that caused the error and the exceeded timeout | 
 |   // period) if the executable is not completed at end of the timeout period. | 
 |   google.protobuf.Duration execution_timeout = 2; | 
 | } | 
 |  | 
 | // The status of a cluster and its instances. | 
 | message ClusterStatus { | 
 |   // The cluster state. | 
 |   enum State { | 
 |     // The cluster state is unknown. | 
 |     UNKNOWN = 0; | 
 |  | 
 |     // The cluster is being created and set up. It is not ready for use. | 
 |     CREATING = 1; | 
 |  | 
 |     // The cluster is currently running and healthy. It is ready for use. | 
 |     RUNNING = 2; | 
 |  | 
 |     // The cluster encountered an error. It is not ready for use. | 
 |     ERROR = 3; | 
 |  | 
 |     // The cluster is being deleted. It cannot be used. | 
 |     DELETING = 4; | 
 |  | 
 |     // The cluster is being updated. It continues to accept and process jobs. | 
 |     UPDATING = 5; | 
 |   } | 
 |  | 
 |   // [Output-only] The cluster's state. | 
 |   State state = 1; | 
 |  | 
 |   // [Output-only] Optional details of cluster's state. | 
 |   string detail = 2; | 
 |  | 
 |   // [Output-only] Time when this state was entered. | 
 |   google.protobuf.Timestamp state_start_time = 3; | 
 | } | 
 |  | 
 | // Specifies the selection and config of software inside the cluster. | 
 | message SoftwareConfig { | 
 |   // [Optional] The version of software inside the cluster. It must match the | 
 |   // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the | 
 |   // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)). | 
 |   string image_version = 1; | 
 |  | 
 |   // [Optional] The properties to set on daemon config files. | 
 |   // | 
 |   // Property keys are specified in `prefix:property` format, such as | 
 |   // `core:fs.defaultFS`. The following are supported prefixes | 
 |   // and their mappings: | 
 |   // | 
 |   // * core:   `core-site.xml` | 
 |   // * hdfs:   `hdfs-site.xml` | 
 |   // * mapred: `mapred-site.xml` | 
 |   // * yarn:   `yarn-site.xml` | 
 |   // * hive:   `hive-site.xml` | 
 |   // * pig:    `pig.properties` | 
 |   // * spark:  `spark-defaults.conf` | 
 |   map<string, string> properties = 2; | 
 | } | 
 |  | 
 | // A request to create a cluster. | 
 | message CreateClusterRequest { | 
 |   // [Required] The ID of the Google Cloud Platform project that the cluster | 
 |   // belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The Cloud Dataproc region in which to handle the request. | 
 |   string region = 3; | 
 |  | 
 |   // [Required] The cluster to create. | 
 |   Cluster cluster = 2; | 
 | } | 
 |  | 
 | // A request to update a cluster. | 
 | message UpdateClusterRequest { | 
 |   // [Required] The ID of the Google Cloud Platform project the | 
 |   // cluster belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The Cloud Dataproc region in which to handle the request. | 
 |   string region = 5; | 
 |  | 
 |   // [Required] The cluster name. | 
 |   string cluster_name = 2; | 
 |  | 
 |   // [Required] The changes to the cluster. | 
 |   Cluster cluster = 3; | 
 |  | 
 |   // [Required] Specifies the path, relative to <code>Cluster</code>, of | 
 |   // the field to update. For example, to change the number of workers | 
 |   // in a cluster to 5, the <code>update_mask</code> parameter would be | 
 |   // specified as <code>config.worker_config.num_instances</code>, | 
 |   // and the `PATCH` request body would specify the new value, as follows: | 
 |   // | 
 |   //     { | 
 |   //       "config":{ | 
 |   //         "workerConfig":{ | 
 |   //           "numInstances":"5" | 
 |   //         } | 
 |   //       } | 
 |   //     } | 
 |   // Similarly, to change the number of preemptible workers in a cluster to 5, the | 
 |   // <code>update_mask</code> parameter would be <code>config.secondary_worker_config.num_instances</code>, | 
 |   // and the `PATCH` request body would be set as follows: | 
 |   // | 
 |   //     { | 
 |   //       "config":{ | 
 |   //         "secondaryWorkerConfig":{ | 
 |   //           "numInstances":"5" | 
 |   //         } | 
 |   //       } | 
 |   //     } | 
 |   // <strong>Note:</strong> Currently, <code>config.worker_config.num_instances</code> | 
 |   // and <code>config.secondary_worker_config.num_instances</code> are the only | 
 |   // fields that can be updated. | 
 |   google.protobuf.FieldMask update_mask = 4; | 
 | } | 
 |  | 
 | // A request to delete a cluster. | 
 | message DeleteClusterRequest { | 
 |   // [Required] The ID of the Google Cloud Platform project that the cluster | 
 |   // belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The Cloud Dataproc region in which to handle the request. | 
 |   string region = 3; | 
 |  | 
 |   // [Required] The cluster name. | 
 |   string cluster_name = 2; | 
 | } | 
 |  | 
 | // Request to get the resource representation for a cluster in a project. | 
 | message GetClusterRequest { | 
 |   // [Required] The ID of the Google Cloud Platform project that the cluster | 
 |   // belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The Cloud Dataproc region in which to handle the request. | 
 |   string region = 3; | 
 |  | 
 |   // [Required] The cluster name. | 
 |   string cluster_name = 2; | 
 | } | 
 |  | 
 | // A request to list the clusters in a project. | 
 | message ListClustersRequest { | 
 |   // [Required] The ID of the Google Cloud Platform project that the cluster | 
 |   // belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The Cloud Dataproc region in which to handle the request. | 
 |   string region = 4; | 
 |  | 
 |   // [Optional] The standard List page size. | 
 |   int32 page_size = 2; | 
 |  | 
 |   // [Optional] The standard List page token. | 
 |   string page_token = 3; | 
 | } | 
 |  | 
 | // The list of all clusters in a project. | 
 | message ListClustersResponse { | 
 |   // [Output-only] The clusters in the project. | 
 |   repeated Cluster clusters = 1; | 
 |  | 
 |   // [Output-only] This token is included in the response if there are more | 
 |   // results to fetch. To fetch additional results, provide this value as the | 
 |   // `page_token` in a subsequent <code>ListClustersRequest</code>. | 
 |   string next_page_token = 2; | 
 | } | 
 |  | 
 | // A request to collect cluster diagnostic information. | 
 | message DiagnoseClusterRequest { | 
 |   // [Required] The ID of the Google Cloud Platform project that the cluster | 
 |   // belongs to. | 
 |   string project_id = 1; | 
 |  | 
 |   // [Required] The Cloud Dataproc region in which to handle the request. | 
 |   string region = 3; | 
 |  | 
 |   // [Required] The cluster name. | 
 |   string cluster_name = 2; | 
 | } | 
 |  | 
 | // The location of diagnostic output. | 
 | message DiagnoseClusterResults { | 
 |   // [Output-only] The Google Cloud Storage URI of the diagnostic output. | 
 |   // The output report is a plain text file with a summary of collected | 
 |   // diagnostics. | 
 |   string output_uri = 1; | 
 | } |