blob: fc7f45eadf4b00f59440b54d7f14e200d6b6ca0a [file] [log] [blame]
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/annotations.proto";
import "google/cloud/dataproc/v1/operations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1";
// The ClusterControllerService provides methods to manage clusters
// of Google Compute Engine instances.
service ClusterController {
// Creates a cluster in a project.
rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters" body: "cluster" };
}
// Updates a cluster in a project.
rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" };
}
// Deletes a cluster in a project.
rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
}
// Gets the resource representation for a cluster in a project.
rpc GetCluster(GetClusterRequest) returns (Cluster) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
}
// Lists all regions/{region}/clusters in a project.
rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters" };
}
// Gets cluster diagnostic information.
// After the operation completes, the Operation.response field
// contains `DiagnoseClusterOutputLocation`.
rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" };
}
}
// Describes the identifying information, config, and status of
// a cluster of Google Compute Engine instances.
message Cluster {
// [Required] The Google Cloud Platform project ID that the cluster belongs to.
string project_id = 1;
// [Required] The cluster name. Cluster names within a project must be
// unique. Names of deleted clusters can be reused.
string cluster_name = 2;
// [Required] The cluster config. Note that Cloud Dataproc may set
// default values, and values may change when clusters are updated.
ClusterConfig config = 3;
// [Output-only] Cluster status.
ClusterStatus status = 4;
// [Output-only] The previous cluster status.
repeated ClusterStatus status_history = 7;
// [Output-only] A cluster UUID (Unique Universal Identifier). Cloud Dataproc
// generates this value when it creates the cluster.
string cluster_uuid = 6;
}
// The cluster config.
message ClusterConfig {
// [Optional] A Google Cloud Storage staging bucket used for sharing generated
// SSH keys and config. If you do not specify a staging bucket, Cloud
// Dataproc will determine an appropriate Cloud Storage location (US,
// ASIA, or EU) for your cluster's staging bucket according to the Google
// Compute Engine zone where your cluster is deployed, and then it will create
// and manage this project-level, per-location bucket for you.
string config_bucket = 1;
// [Required] The shared Google Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8;
// [Optional] The Google Compute Engine config settings for
// the master instance in a cluster.
InstanceGroupConfig master_config = 9;
// [Optional] The Google Compute Engine config settings for
// worker instances in a cluster.
InstanceGroupConfig worker_config = 10;
// [Optional] The Google Compute Engine config settings for
// additional worker instances in a cluster.
InstanceGroupConfig secondary_worker_config = 12;
// [Optional] The config settings for software inside the cluster.
SoftwareConfig software_config = 13;
// [Optional] Commands to execute on each node after config is
// completed. By default, executables are run on master and all worker nodes.
// You can test a node's <code>role</code> metadata to run an executable on
// a master or worker node, as shown below using `curl` (you can also use `wget`):
//
// ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
// if [[ "${ROLE}" == 'Master' ]]; then
// ... master specific actions ...
// else
// ... worker specific actions ...
// fi
repeated NodeInitializationAction initialization_actions = 11;
}
// Common config settings for resources of Google Compute Engine cluster
// instances, applicable to all instances in the cluster.
message GceClusterConfig {
// [Required] The zone where the Google Compute Engine cluster will be located.
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`.
string zone_uri = 1;
// [Optional] The Google Compute Engine network to be used for machine
// communications. Cannot be specified with subnetwork_uri. If neither
// `network_uri` nor `subnetwork_uri` is specified, the "default" network of
// the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
// [Using Subnetworks](/compute/docs/subnetworks) for more information).
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`.
string network_uri = 2;
// [Optional] The Google Compute Engine subnetwork to be used for machine
// communications. Cannot be specified with network_uri.
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`.
string subnetwork_uri = 6;
// [Optional] If true, all instances in the cluster will only have internal IP
// addresses. By default, clusters are not restricted to internal IP addresses,
// and will have ephemeral external IP addresses assigned to each instance.
// This `internal_ip_only` restriction can only be enabled for subnetwork
// enabled networks, and all off-cluster dependencies must be configured to be
// accessible without external IP addresses.
bool internal_ip_only = 7;
// [Optional] The URIs of service account scopes to be included in Google
// Compute Engine instances. The following base set of scopes is always
// included:
//
// * https://www.googleapis.com/auth/cloud.useraccounts.readonly
// * https://www.googleapis.com/auth/devstorage.read_write
// * https://www.googleapis.com/auth/logging.write
//
// If no scopes are specified, the following defaults are also provided:
//
// * https://www.googleapis.com/auth/bigquery
// * https://www.googleapis.com/auth/bigtable.admin.table
// * https://www.googleapis.com/auth/bigtable.data
// * https://www.googleapis.com/auth/devstorage.full_control
repeated string service_account_scopes = 3;
// The Google Compute Engine tags to add to all instances (see
// [Labeling instances](/compute/docs/label-or-tag-resources#labeling_instances)).
repeated string tags = 4;
// The Google Compute Engine metadata entries to add to all instances (see
// [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
map<string, string> metadata = 5;
}
// [Optional] The config settings for Google Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// [Required] The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1;
// [Optional] The list of instance names. Cloud Dataproc derives the names from
// `cluster_name`, `num_instances`, and the instance group if not set by user
// (recommended practice is to let Cloud Dataproc derive the name).
repeated string instance_names = 2;
// [Output-only] The Google Compute Engine image resource used for cluster
// instances. Inferred from `SoftwareConfig.image_version`.
string image_uri = 3;
// [Required] The Google Compute Engine machine type used for cluster instances.
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`.
string machine_type_uri = 4;
// [Optional] Disk option config settings.
DiskConfig disk_config = 5;
// [Optional] Specifies that this instance group contains preemptible instances.
bool is_preemptible = 6;
// [Output-only] The config for Google Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
ManagedGroupConfig managed_group_config = 7;
}
// Specifies the resources used to actively manage an instance group.
message ManagedGroupConfig {
// [Output-only] The name of the Instance Template used for the Managed
// Instance Group.
string instance_template_name = 1;
// [Output-only] The name of the Instance Group Manager for this group.
string instance_group_manager_name = 2;
}
// Specifies the config of disk options for a group of VM instances.
message DiskConfig {
// [Optional] Size in GB of the boot disk (default is 500GB).
int32 boot_disk_size_gb = 1;
// [Optional] Number of attached SSDs, from 0 to 4 (default is 0).
// If SSDs are not attached, the boot disk is used to store runtime logs and
// [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
// If one or more SSDs are attached, this runtime bulk
// data is spread across them, and the boot disk contains only basic
// config and installed binaries.
int32 num_local_ssds = 2;
}
// Specifies an executable to run on a fully configured node and a
// timeout period for executable completion.
message NodeInitializationAction {
// [Required] Google Cloud Storage URI of executable file.
string executable_file = 1;
// [Optional] Amount of time executable has to complete. Default is
// 10 minutes. Cluster creation fails with an explanatory error message (the
// name of the executable that caused the error and the exceeded timeout
// period) if the executable is not completed at end of the timeout period.
google.protobuf.Duration execution_timeout = 2;
}
// The status of a cluster and its instances.
message ClusterStatus {
// The cluster state.
enum State {
// The cluster state is unknown.
UNKNOWN = 0;
// The cluster is being created and set up. It is not ready for use.
CREATING = 1;
// The cluster is currently running and healthy. It is ready for use.
RUNNING = 2;
// The cluster encountered an error. It is not ready for use.
ERROR = 3;
// The cluster is being deleted. It cannot be used.
DELETING = 4;
// The cluster is being updated. It continues to accept and process jobs.
UPDATING = 5;
}
// [Output-only] The cluster's state.
State state = 1;
// [Output-only] Optional details of cluster's state.
string detail = 2;
// [Output-only] Time when this state was entered.
google.protobuf.Timestamp state_start_time = 3;
}
// Specifies the selection and config of software inside the cluster.
message SoftwareConfig {
// [Optional] The version of software inside the cluster. It must match the
// regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the
// latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)).
string image_version = 1;
// [Optional] The properties to set on daemon config files.
//
// Property keys are specified in `prefix:property` format, such as
// `core:fs.defaultFS`. The following are supported prefixes
// and their mappings:
//
// * core: `core-site.xml`
// * hdfs: `hdfs-site.xml`
// * mapred: `mapred-site.xml`
// * yarn: `yarn-site.xml`
// * hive: `hive-site.xml`
// * pig: `pig.properties`
// * spark: `spark-defaults.conf`
map<string, string> properties = 2;
}
// A request to create a cluster.
message CreateClusterRequest {
// [Required] The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The cluster to create.
Cluster cluster = 2;
}
// A request to update a cluster.
message UpdateClusterRequest {
// [Required] The ID of the Google Cloud Platform project the
// cluster belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 5;
// [Required] The cluster name.
string cluster_name = 2;
// [Required] The changes to the cluster.
Cluster cluster = 3;
// [Required] Specifies the path, relative to <code>Cluster</code>, of
// the field to update. For example, to change the number of workers
// in a cluster to 5, the <code>update_mask</code> parameter would be
// specified as <code>config.worker_config.num_instances</code>,
// and the `PATCH` request body would specify the new value, as follows:
//
// {
// "config":{
// "workerConfig":{
// "numInstances":"5"
// }
// }
// }
// Similarly, to change the number of preemptible workers in a cluster to 5, the
// <code>update_mask</code> parameter would be <code>config.secondary_worker_config.num_instances</code>,
// and the `PATCH` request body would be set as follows:
//
// {
// "config":{
// "secondaryWorkerConfig":{
// "numInstances":"5"
// }
// }
// }
// <strong>Note:</strong> Currently, <code>config.worker_config.num_instances</code>
// and <code>config.secondary_worker_config.num_instances</code> are the only
// fields that can be updated.
google.protobuf.FieldMask update_mask = 4;
}
// A request to delete a cluster.
message DeleteClusterRequest {
// [Required] The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The cluster name.
string cluster_name = 2;
}
// Request to get the resource representation for a cluster in a project.
message GetClusterRequest {
// [Required] The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The cluster name.
string cluster_name = 2;
}
// A request to list the clusters in a project.
message ListClustersRequest {
// [Required] The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 4;
// [Optional] The standard List page size.
int32 page_size = 2;
// [Optional] The standard List page token.
string page_token = 3;
}
// The list of all clusters in a project.
message ListClustersResponse {
// [Output-only] The clusters in the project.
repeated Cluster clusters = 1;
// [Output-only] This token is included in the response if there are more
// results to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent <code>ListClustersRequest</code>.
string next_page_token = 2;
}
// A request to collect cluster diagnostic information.
message DiagnoseClusterRequest {
// [Required] The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The cluster name.
string cluster_name = 2;
}
// The location of diagnostic output.
message DiagnoseClusterResults {
// [Output-only] The Google Cloud Storage URI of the diagnostic output.
// The output report is a plain text file with a summary of collected
// diagnostics.
string output_uri = 1;
}