third_party/googleapis/google/cloud/dataproc/v1/jobs.proto - bazel - Git at Google

 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 syntax = "proto3";

 package google.cloud.dataproc.v1;

 import "google/api/annotations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/timestamp.proto";

 option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
 option java_multiple_files = true;
 option java_outer_classname = "JobsProto";
 option java_package = "com.google.cloud.dataproc.v1";


 // The JobController provides methods to manage jobs.
 service JobController {
   // Submits a job to a cluster.
   rpc SubmitJob(SubmitJobRequest) returns (Job) {
     option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" };
   }

   // Gets the resource representation for a job in a project.
   rpc GetJob(GetJobRequest) returns (Job) {
     option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
   }

   // Lists regions/{region}/jobs in a project.
   rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
     option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" };
   }

   // Starts a job cancellation request. To access the job resource
   // after cancellation, call
   // [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or
   // [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get).
   rpc CancelJob(CancelJobRequest) returns (Job) {
     option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" };
   }

   // Deletes the job from the project. If the job is active, the delete fails,
   // and the response returns `FAILED_PRECONDITION`.
   rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
     option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
   }
 }

 // The runtime logging config of the job.
 message LoggingConfig {
   // The Log4j level for job execution. When running an
   // [Apache Hive](http://hive.apache.org/) job, Cloud
   // Dataproc configures the Hive client to an equivalent verbosity level.
   enum Level {
     // Level is unspecified. Use default level for log4j.
     LEVEL_UNSPECIFIED = 0;

     // Use ALL level for log4j.
     ALL = 1;

     // Use TRACE level for log4j.
     TRACE = 2;

     // Use DEBUG level for log4j.
     DEBUG = 3;

     // Use INFO level for log4j.
     INFO = 4;

     // Use WARN level for log4j.
     WARN = 5;

     // Use ERROR level for log4j.
     ERROR = 6;

     // Use FATAL level for log4j.
     FATAL = 7;

     // Turn off log4j.
     OFF = 8;
   }

   // The per-package log levels for the driver. This may include
   // "root" package name to configure rootLogger.
   // Examples:
   //   'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
   map<string, Level> driver_log_levels = 2;
 }

 // A Cloud Dataproc job for running
 // [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
 // jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
 message HadoopJob {
   // [Required] Indicates the location of the driver's main class. Specify
   // either the jar file that contains the main class or the main class name.
   // To specify both, add the jar file to `jar_file_uris`, and then specify
   // the main class name in this property.
   oneof driver {
     // The HCFS URI of the jar file containing the main class.
     // Examples:
     //     'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
     //     'hdfs:/tmp/test-samples/custom-wordcount.jar'
     //     'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
     string main_jar_file_uri = 1;

     // The name of the driver's main class. The jar file containing the class
     // must be in the default CLASSPATH or specified in `jar_file_uris`.
     string main_class = 2;
   }

   // [Optional] The arguments to pass to the driver. Do not
   // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
   // properties, since a collision may occur that causes an incorrect job
   // submission.
   repeated string args = 3;

   // [Optional] Jar file URIs to add to the CLASSPATHs of the
   // Hadoop driver and tasks.
   repeated string jar_file_uris = 4;

   // [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
   // to the working directory of Hadoop drivers and distributed tasks. Useful
   // for naively parallel tasks.
   repeated string file_uris = 5;

   // [Optional] HCFS URIs of archives to be extracted in the working directory of
   // Hadoop drivers and tasks. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, or .zip.
   repeated string archive_uris = 6;

   // [Optional] A mapping of property names to values, used to configure Hadoop.
   // Properties that conflict with values set by the Cloud Dataproc API may be
   // overwritten. Can include properties set in /etc/hadoop/conf/*-site and
   // classes in user code.
   map<string, string> properties = 7;

   // [Optional] The runtime log config for job execution.
   LoggingConfig logging_config = 8;
 }

 // A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
 // applications on YARN.
 message SparkJob {
   // [Required] The specification of the main method to call to drive the job.
   // Specify either the jar file that contains the main class or the main class
   // name. To pass both a main jar and a main class in that jar, add the jar to
   // `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
   oneof driver {
     // The HCFS URI of the jar file that contains the main class.
     string main_jar_file_uri = 1;

     // The name of the driver's main class. The jar file that contains the class
     // must be in the default CLASSPATH or specified in `jar_file_uris`.
     string main_class = 2;
   }

   // [Optional] The arguments to pass to the driver. Do not include arguments,
   // such as `--conf`, that can be set as job properties, since a collision may
   // occur that causes an incorrect job submission.
   repeated string args = 3;

   // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
   // Spark driver and tasks.
   repeated string jar_file_uris = 4;

   // [Optional] HCFS URIs of files to be copied to the working directory of
   // Spark drivers and distributed tasks. Useful for naively parallel tasks.
   repeated string file_uris = 5;

   // [Optional] HCFS URIs of archives to be extracted in the working directory
   // of Spark drivers and tasks. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6;

   // [Optional] A mapping of property names to values, used to configure Spark.
   // Properties that conflict with values set by the Cloud Dataproc API may be
   // overwritten. Can include properties set in
   // /etc/spark/conf/spark-defaults.conf and classes in user code.
   map<string, string> properties = 7;

   // [Optional] The runtime log config for job execution.
   LoggingConfig logging_config = 8;
 }

 // A Cloud Dataproc job for running
 // [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
 // applications on YARN.
 message PySparkJob {
   // [Required] The HCFS URI of the main Python file to use as the driver. Must
   // be a .py file.
   string main_python_file_uri = 1;

   // [Optional] The arguments to pass to the driver.  Do not include arguments,
   // such as `--conf`, that can be set as job properties, since a collision may
   // occur that causes an incorrect job submission.
   repeated string args = 2;

   // [Optional] HCFS file URIs of Python files to pass to the PySpark
   // framework. Supported file types: .py, .egg, and .zip.
   repeated string python_file_uris = 3;

   // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
   // Python driver and tasks.
   repeated string jar_file_uris = 4;

   // [Optional] HCFS URIs of files to be copied to the working directory of
   // Python drivers and distributed tasks. Useful for naively parallel tasks.
   repeated string file_uris = 5;

   // [Optional] HCFS URIs of archives to be extracted in the working directory of
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6;

   // [Optional] A mapping of property names to values, used to configure PySpark.
   // Properties that conflict with values set by the Cloud Dataproc API may be
   // overwritten. Can include properties set in
   // /etc/spark/conf/spark-defaults.conf and classes in user code.
   map<string, string> properties = 7;

   // [Optional] The runtime log config for job execution.
   LoggingConfig logging_config = 8;
 }

 // A list of queries to run on a cluster.
 message QueryList {
   // [Required] The queries to execute. You do not need to terminate a query
   // with a semicolon. Multiple queries can be specified in one string
   // by separating each with a semicolon. Here is an example of an Cloud
   // Dataproc API snippet that uses a QueryList to specify a HiveJob:
   //
   //     "hiveJob": {
   //       "queryList": {
   //         "queries": [
   //           "query1",
   //           "query2",
   //           "query3;query4",
   //         ]
   //       }
   //     }
   repeated string queries = 1;
 }

 // A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
 // queries on YARN.
 message HiveJob {
   // [Required] The sequence of Hive queries to execute, specified as either
   // an HCFS file URI or a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains Hive queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // [Optional] Whether to continue executing queries if a query fails.
   // The default value is `false`. Setting to `true` can be useful when executing
   // independent parallel queries.
   bool continue_on_failure = 3;

   // [Optional] Mapping of query variable names to values (equivalent to the
   // Hive command: `SET name="value";`).
   map<string, string> script_variables = 4;

   // [Optional] A mapping of property names and values, used to configure Hive.
   // Properties that conflict with values set by the Cloud Dataproc API may be
   // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
   // /etc/hive/conf/hive-site.xml, and classes in user code.
   map<string, string> properties = 5;

   // [Optional] HCFS URIs of jar files to add to the CLASSPATH of the
   // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
   // and UDFs.
   repeated string jar_file_uris = 6;
 }

 // A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
 // queries.
 message SparkSqlJob {
   // [Required] The sequence of Spark SQL queries to execute, specified as
   // either an HCFS file URI or as a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains SQL queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // [Optional] Mapping of query variable names to values (equivalent to the
   // Spark SQL command: SET `name="value";`).
   map<string, string> script_variables = 3;

   // [Optional] A mapping of property names to values, used to configure
   // Spark SQL's SparkConf. Properties that conflict with values set by the
   // Cloud Dataproc API may be overwritten.
   map<string, string> properties = 4;

   // [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
   repeated string jar_file_uris = 56;

   // [Optional] The runtime log config for job execution.
   LoggingConfig logging_config = 6;
 }

 // A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
 // queries on YARN.
 message PigJob {
   // [Required] The sequence of Pig queries to execute, specified as an HCFS
   // file URI or a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains the Pig queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // [Optional] Whether to continue executing queries if a query fails.
   // The default value is `false`. Setting to `true` can be useful when executing
   // independent parallel queries.
   bool continue_on_failure = 3;

   // [Optional] Mapping of query variable names to values (equivalent to the Pig
   // command: `name=[value]`).
   map<string, string> script_variables = 4;

   // [Optional] A mapping of property names to values, used to configure Pig.
   // Properties that conflict with values set by the Cloud Dataproc API may be
   // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
   // /etc/pig/conf/pig.properties, and classes in user code.
   map<string, string> properties = 5;

   // [Optional] HCFS URIs of jar files to add to the CLASSPATH of
   // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
   repeated string jar_file_uris = 6;

   // [Optional] The runtime log config for job execution.
   LoggingConfig logging_config = 7;
 }

 // Cloud Dataproc job config.
 message JobPlacement {
   // [Required] The name of the cluster where the job will be submitted.
   string cluster_name = 1;

   // [Output-only] A cluster UUID generated by the Cloud Dataproc service when
   // the job is submitted.
   string cluster_uuid = 2;
 }

 // Cloud Dataproc job status.
 message JobStatus {
   // The job state.
   enum State {
     // The job state is unknown.
     STATE_UNSPECIFIED = 0;

     // The job is pending; it has been submitted, but is not yet running.
     PENDING = 1;

     // Job has been received by the service and completed initial setup;
     // it will soon be submitted to the cluster.
     SETUP_DONE = 8;

     // The job is running on the cluster.
     RUNNING = 2;

     // A CancelJob request has been received, but is pending.
     CANCEL_PENDING = 3;

     // Transient in-flight resources have been canceled, and the request to
     // cancel the running job has been issued to the cluster.
     CANCEL_STARTED = 7;

     // The job cancellation was successful.
     CANCELLED = 4;

     // The job has completed successfully.
     DONE = 5;

     // The job has completed, but encountered an error.
     ERROR = 6;
   }

   // [Output-only] A state message specifying the overall job state.
   State state = 1;

   // [Output-only] Optional job state details, such as an error
   // description if the state is <code>ERROR</code>.
   string details = 2;

   // [Output-only] The time when this state was entered.
   google.protobuf.Timestamp state_start_time = 6;
 }

 // Encapsulates the full scoping used to reference a job.
 message JobReference {
   // [Required] The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1;

   // [Optional] The job ID, which must be unique within the project. The job ID
   // is generated by the server upon job submission or provided by the user as a
   // means to perform retries without creating duplicate jobs. The ID must
   // contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
   // hyphens (-). The maximum length is 512 characters.
   string job_id = 2;
 }

 // A Cloud Dataproc job resource.
 message Job {
   // [Optional] The fully qualified reference to the job, which can be used to
   // obtain the equivalent REST path of the job resource. If this property
   // is not specified when a job is created, the server generates a
   // <code>job_id</code>.
   JobReference reference = 1;

   // [Required] Job information, including how, when, and where to
   // run the job.
   JobPlacement placement = 2;

   // [Required] The application/framework-specific portion of the job.
   oneof type_job {
     // Job is a Hadoop job.
     HadoopJob hadoop_job = 3;

     // Job is a Spark job.
     SparkJob spark_job = 4;

     // Job is a Pyspark job.
     PySparkJob pyspark_job = 5;

     // Job is a Hive job.
     HiveJob hive_job = 6;

     // Job is a Pig job.
     PigJob pig_job = 7;

     // Job is a SparkSql job.
     SparkSqlJob spark_sql_job = 12;
   }

   // [Output-only] The job status. Additional application-specific
   // status information may be contained in the <code>type_job</code>
   // and <code>yarn_applications</code> fields.
   JobStatus status = 8;

   // [Output-only] The previous job status.
   repeated JobStatus status_history = 13;

   // [Output-only] A URI pointing to the location of the stdout of the job's
   // driver program.
   string driver_output_resource_uri = 17;

   // [Output-only] If present, the location of miscellaneous control files
   // which may be used as part of job setup and handling. If not present,
   // control files may be placed in the same location as `driver_output_uri`.
   string driver_control_files_uri = 15;
 }

 // A request to submit a job.
 message SubmitJobRequest {
   // [Required] The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1;

   // [Required] The Cloud Dataproc region in which to handle the request.
   string region = 3;

   // [Required] The job resource.
   Job job = 2;
 }

 // A request to get the resource representation for a job in a project.
 message GetJobRequest {
   // [Required] The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1;

   // [Required] The Cloud Dataproc region in which to handle the request.
   string region = 3;

   // [Required] The job ID.
   string job_id = 2;
 }

 // A request to list jobs in a project.
 message ListJobsRequest {
   // A matcher that specifies categories of job states.
   enum JobStateMatcher {
     // Match all jobs, regardless of state.
     ALL = 0;

     // Only match jobs in non-terminal states: PENDING, RUNNING, or
     // CANCEL_PENDING.
     ACTIVE = 1;

     // Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
     NON_ACTIVE = 2;
   }

   // [Required] The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1;

   // [Required] The Cloud Dataproc region in which to handle the request.
   string region = 6;

   // [Optional] The number of results to return in each response.
   int32 page_size = 2;

   // [Optional] The page token, returned by a previous call, to request the
   // next page of results.
   string page_token = 3;

   // [Optional] If set, the returned jobs list includes only jobs that were
   // submitted to the named cluster.
   string cluster_name = 4;

   // [Optional] Specifies enumerated categories of jobs to list
   // (default = match ALL jobs).
   JobStateMatcher job_state_matcher = 5;
 }

 // A list of jobs in a project.
 message ListJobsResponse {
   // [Output-only] Jobs list.
   repeated Job jobs = 1;

   // [Optional] This token is included in the response if there are more results
   // to fetch. To fetch additional results, provide this value as the
   // `page_token` in a subsequent <code>ListJobsRequest</code>.
   string next_page_token = 2;
 }

 // A request to cancel a job.
 message CancelJobRequest {
   // [Required] The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1;

   // [Required] The Cloud Dataproc region in which to handle the request.
   string region = 3;

   // [Required] The job ID.
   string job_id = 2;
 }

 // A request to delete a job.
 message DeleteJobRequest {
   // [Required] The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1;

   // [Required] The Cloud Dataproc region in which to handle the request.
   string region = 3;

   // [Required] The job ID.
   string job_id = 2;
 }
	// Copyright 2016 Google Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	syntax = "proto3";

	package google.cloud.dataproc.v1;

	import "google/api/annotations.proto";
	import "google/protobuf/empty.proto";
	import "google/protobuf/timestamp.proto";

	option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
	option java_multiple_files = true;
	option java_outer_classname = "JobsProto";
	option java_package = "com.google.cloud.dataproc.v1";


	// The JobController provides methods to manage jobs.
	service JobController {
	// Submits a job to a cluster.
	rpc SubmitJob(SubmitJobRequest) returns (Job) {
	option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" };
	}

	// Gets the resource representation for a job in a project.
	rpc GetJob(GetJobRequest) returns (Job) {
	option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
	}

	// Lists regions/{region}/jobs in a project.
	rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
	option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" };
	}

	// Starts a job cancellation request. To access the job resource
	// after cancellation, call
	// [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or
	// [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get).
	rpc CancelJob(CancelJobRequest) returns (Job) {
	option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" };
	}

	// Deletes the job from the project. If the job is active, the delete fails,
	// and the response returns `FAILED_PRECONDITION`.
	rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
	option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
	}
	}

	// The runtime logging config of the job.
	message LoggingConfig {
	// The Log4j level for job execution. When running an
	// [Apache Hive](http://hive.apache.org/) job, Cloud
	// Dataproc configures the Hive client to an equivalent verbosity level.
	enum Level {
	// Level is unspecified. Use default level for log4j.
	LEVEL_UNSPECIFIED = 0;

	// Use ALL level for log4j.
	ALL = 1;

	// Use TRACE level for log4j.
	TRACE = 2;

	// Use DEBUG level for log4j.
	DEBUG = 3;

	// Use INFO level for log4j.
	INFO = 4;

	// Use WARN level for log4j.
	WARN = 5;

	// Use ERROR level for log4j.
	ERROR = 6;

	// Use FATAL level for log4j.
	FATAL = 7;

	// Turn off log4j.
	OFF = 8;
	}

	// The per-package log levels for the driver. This may include
	// "root" package name to configure rootLogger.
	// Examples:
	// 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
	map<string, Level> driver_log_levels = 2;
	}

	// A Cloud Dataproc job for running
	// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
	// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
	message HadoopJob {
	// [Required] Indicates the location of the driver's main class. Specify
	// either the jar file that contains the main class or the main class name.
	// To specify both, add the jar file to `jar_file_uris`, and then specify
	// the main class name in this property.
	oneof driver {
	// The HCFS URI of the jar file containing the main class.
	// Examples:
	// 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
	// 'hdfs:/tmp/test-samples/custom-wordcount.jar'
	// 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
	string main_jar_file_uri = 1;

	// The name of the driver's main class. The jar file containing the class
	// must be in the default CLASSPATH or specified in `jar_file_uris`.
	string main_class = 2;
	}

	// [Optional] The arguments to pass to the driver. Do not
	// include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
	// properties, since a collision may occur that causes an incorrect job
	// submission.
	repeated string args = 3;

	// [Optional] Jar file URIs to add to the CLASSPATHs of the
	// Hadoop driver and tasks.
	repeated string jar_file_uris = 4;

	// [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
	// to the working directory of Hadoop drivers and distributed tasks. Useful
	// for naively parallel tasks.
	repeated string file_uris = 5;

	// [Optional] HCFS URIs of archives to be extracted in the working directory of
	// Hadoop drivers and tasks. Supported file types:
	// .jar, .tar, .tar.gz, .tgz, or .zip.
	repeated string archive_uris = 6;

	// [Optional] A mapping of property names to values, used to configure Hadoop.
	// Properties that conflict with values set by the Cloud Dataproc API may be
	// overwritten. Can include properties set in /etc/hadoop/conf/*-site and
	// classes in user code.
	map<string, string> properties = 7;

	// [Optional] The runtime log config for job execution.
	LoggingConfig logging_config = 8;
	}

	// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
	// applications on YARN.
	message SparkJob {
	// [Required] The specification of the main method to call to drive the job.
	// Specify either the jar file that contains the main class or the main class
	// name. To pass both a main jar and a main class in that jar, add the jar to
	// `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
	oneof driver {
	// The HCFS URI of the jar file that contains the main class.
	string main_jar_file_uri = 1;

	// The name of the driver's main class. The jar file that contains the class
	// must be in the default CLASSPATH or specified in `jar_file_uris`.
	string main_class = 2;
	}

	// [Optional] The arguments to pass to the driver. Do not include arguments,
	// such as `--conf`, that can be set as job properties, since a collision may
	// occur that causes an incorrect job submission.
	repeated string args = 3;

	// [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
	// Spark driver and tasks.
	repeated string jar_file_uris = 4;

	// [Optional] HCFS URIs of files to be copied to the working directory of
	// Spark drivers and distributed tasks. Useful for naively parallel tasks.
	repeated string file_uris = 5;

	// [Optional] HCFS URIs of archives to be extracted in the working directory
	// of Spark drivers and tasks. Supported file types:
	// .jar, .tar, .tar.gz, .tgz, and .zip.
	repeated string archive_uris = 6;

	// [Optional] A mapping of property names to values, used to configure Spark.
	// Properties that conflict with values set by the Cloud Dataproc API may be
	// overwritten. Can include properties set in
	// /etc/spark/conf/spark-defaults.conf and classes in user code.
	map<string, string> properties = 7;

	// [Optional] The runtime log config for job execution.
	LoggingConfig logging_config = 8;
	}

	// A Cloud Dataproc job for running
	// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
	// applications on YARN.
	message PySparkJob {
	// [Required] The HCFS URI of the main Python file to use as the driver. Must
	// be a .py file.
	string main_python_file_uri = 1;

	// [Optional] The arguments to pass to the driver. Do not include arguments,
	// such as `--conf`, that can be set as job properties, since a collision may
	// occur that causes an incorrect job submission.
	repeated string args = 2;

	// [Optional] HCFS file URIs of Python files to pass to the PySpark
	// framework. Supported file types: .py, .egg, and .zip.
	repeated string python_file_uris = 3;

	// [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
	// Python driver and tasks.
	repeated string jar_file_uris = 4;

	// [Optional] HCFS URIs of files to be copied to the working directory of
	// Python drivers and distributed tasks. Useful for naively parallel tasks.
	repeated string file_uris = 5;

	// [Optional] HCFS URIs of archives to be extracted in the working directory of
	// .jar, .tar, .tar.gz, .tgz, and .zip.
	repeated string archive_uris = 6;

	// [Optional] A mapping of property names to values, used to configure PySpark.
	// Properties that conflict with values set by the Cloud Dataproc API may be
	// overwritten. Can include properties set in
	// /etc/spark/conf/spark-defaults.conf and classes in user code.
	map<string, string> properties = 7;

	// [Optional] The runtime log config for job execution.
	LoggingConfig logging_config = 8;
	}

	// A list of queries to run on a cluster.
	message QueryList {
	// [Required] The queries to execute. You do not need to terminate a query
	// with a semicolon. Multiple queries can be specified in one string
	// by separating each with a semicolon. Here is an example of an Cloud
	// Dataproc API snippet that uses a QueryList to specify a HiveJob:
	//
	// "hiveJob": {
	// "queryList": {
	// "queries": [
	// "query1",
	// "query2",
	// "query3;query4",
	// ]
	// }
	// }
	repeated string queries = 1;
	}

	// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
	// queries on YARN.
	message HiveJob {
	// [Required] The sequence of Hive queries to execute, specified as either
	// an HCFS file URI or a list of queries.
	oneof queries {
	// The HCFS URI of the script that contains Hive queries.
	string query_file_uri = 1;

	// A list of queries.
	QueryList query_list = 2;
	}

	// [Optional] Whether to continue executing queries if a query fails.
	// The default value is `false`. Setting to `true` can be useful when executing
	// independent parallel queries.
	bool continue_on_failure = 3;

	// [Optional] Mapping of query variable names to values (equivalent to the
	// Hive command: `SET name="value";`).
	map<string, string> script_variables = 4;

	// [Optional] A mapping of property names and values, used to configure Hive.
	// Properties that conflict with values set by the Cloud Dataproc API may be
	// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
	// /etc/hive/conf/hive-site.xml, and classes in user code.
	map<string, string> properties = 5;

	// [Optional] HCFS URIs of jar files to add to the CLASSPATH of the
	// Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
	// and UDFs.
	repeated string jar_file_uris = 6;
	}

	// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
	// queries.
	message SparkSqlJob {
	// [Required] The sequence of Spark SQL queries to execute, specified as
	// either an HCFS file URI or as a list of queries.
	oneof queries {
	// The HCFS URI of the script that contains SQL queries.
	string query_file_uri = 1;

	// A list of queries.
	QueryList query_list = 2;
	}

	// [Optional] Mapping of query variable names to values (equivalent to the
	// Spark SQL command: SET `name="value";`).
	map<string, string> script_variables = 3;

	// [Optional] A mapping of property names to values, used to configure
	// Spark SQL's SparkConf. Properties that conflict with values set by the
	// Cloud Dataproc API may be overwritten.
	map<string, string> properties = 4;

	// [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
	repeated string jar_file_uris = 56;

	// [Optional] The runtime log config for job execution.
	LoggingConfig logging_config = 6;
	}

	// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
	// queries on YARN.
	message PigJob {
	// [Required] The sequence of Pig queries to execute, specified as an HCFS
	// file URI or a list of queries.
	oneof queries {
	// The HCFS URI of the script that contains the Pig queries.
	string query_file_uri = 1;

	// A list of queries.
	QueryList query_list = 2;
	}

	// [Optional] Whether to continue executing queries if a query fails.
	// The default value is `false`. Setting to `true` can be useful when executing
	// independent parallel queries.
	bool continue_on_failure = 3;

	// [Optional] Mapping of query variable names to values (equivalent to the Pig
	// command: `name=[value]`).
	map<string, string> script_variables = 4;

	// [Optional] A mapping of property names to values, used to configure Pig.
	// Properties that conflict with values set by the Cloud Dataproc API may be
	// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
	// /etc/pig/conf/pig.properties, and classes in user code.
	map<string, string> properties = 5;

	// [Optional] HCFS URIs of jar files to add to the CLASSPATH of
	// the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
	repeated string jar_file_uris = 6;

	// [Optional] The runtime log config for job execution.
	LoggingConfig logging_config = 7;
	}

	// Cloud Dataproc job config.
	message JobPlacement {
	// [Required] The name of the cluster where the job will be submitted.
	string cluster_name = 1;

	// [Output-only] A cluster UUID generated by the Cloud Dataproc service when
	// the job is submitted.
	string cluster_uuid = 2;
	}

	// Cloud Dataproc job status.
	message JobStatus {
	// The job state.
	enum State {
	// The job state is unknown.
	STATE_UNSPECIFIED = 0;

	// The job is pending; it has been submitted, but is not yet running.
	PENDING = 1;

	// Job has been received by the service and completed initial setup;
	// it will soon be submitted to the cluster.
	SETUP_DONE = 8;

	// The job is running on the cluster.
	RUNNING = 2;

	// A CancelJob request has been received, but is pending.
	CANCEL_PENDING = 3;

	// Transient in-flight resources have been canceled, and the request to
	// cancel the running job has been issued to the cluster.
	CANCEL_STARTED = 7;

	// The job cancellation was successful.
	CANCELLED = 4;

	// The job has completed successfully.
	DONE = 5;

	// The job has completed, but encountered an error.
	ERROR = 6;
	}

	// [Output-only] A state message specifying the overall job state.
	State state = 1;

	// [Output-only] Optional job state details, such as an error
	// description if the state is <code>ERROR</code>.
	string details = 2;

	// [Output-only] The time when this state was entered.
	google.protobuf.Timestamp state_start_time = 6;
	}

	// Encapsulates the full scoping used to reference a job.
	message JobReference {
	// [Required] The ID of the Google Cloud Platform project that the job
	// belongs to.
	string project_id = 1;

	// [Optional] The job ID, which must be unique within the project. The job ID
	// is generated by the server upon job submission or provided by the user as a
	// means to perform retries without creating duplicate jobs. The ID must
	// contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
	// hyphens (-). The maximum length is 512 characters.
	string job_id = 2;
	}

	// A Cloud Dataproc job resource.
	message Job {
	// [Optional] The fully qualified reference to the job, which can be used to
	// obtain the equivalent REST path of the job resource. If this property
	// is not specified when a job is created, the server generates a
	// <code>job_id</code>.
	JobReference reference = 1;

	// [Required] Job information, including how, when, and where to
	// run the job.
	JobPlacement placement = 2;

	// [Required] The application/framework-specific portion of the job.
	oneof type_job {
	// Job is a Hadoop job.
	HadoopJob hadoop_job = 3;

	// Job is a Spark job.
	SparkJob spark_job = 4;

	// Job is a Pyspark job.
	PySparkJob pyspark_job = 5;

	// Job is a Hive job.
	HiveJob hive_job = 6;

	// Job is a Pig job.
	PigJob pig_job = 7;

	// Job is a SparkSql job.
	SparkSqlJob spark_sql_job = 12;
	}

	// [Output-only] The job status. Additional application-specific
	// status information may be contained in the <code>type_job</code>
	// and <code>yarn_applications</code> fields.
	JobStatus status = 8;

	// [Output-only] The previous job status.
	repeated JobStatus status_history = 13;

	// [Output-only] A URI pointing to the location of the stdout of the job's
	// driver program.
	string driver_output_resource_uri = 17;

	// [Output-only] If present, the location of miscellaneous control files
	// which may be used as part of job setup and handling. If not present,
	// control files may be placed in the same location as `driver_output_uri`.
	string driver_control_files_uri = 15;
	}

	// A request to submit a job.
	message SubmitJobRequest {
	// [Required] The ID of the Google Cloud Platform project that the job
	// belongs to.
	string project_id = 1;

	// [Required] The Cloud Dataproc region in which to handle the request.
	string region = 3;

	// [Required] The job resource.
	Job job = 2;
	}

	// A request to get the resource representation for a job in a project.
	message GetJobRequest {
	// [Required] The ID of the Google Cloud Platform project that the job
	// belongs to.
	string project_id = 1;

	// [Required] The Cloud Dataproc region in which to handle the request.
	string region = 3;

	// [Required] The job ID.
	string job_id = 2;
	}

	// A request to list jobs in a project.
	message ListJobsRequest {
	// A matcher that specifies categories of job states.
	enum JobStateMatcher {
	// Match all jobs, regardless of state.
	ALL = 0;

	// Only match jobs in non-terminal states: PENDING, RUNNING, or
	// CANCEL_PENDING.
	ACTIVE = 1;

	// Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
	NON_ACTIVE = 2;
	}

	// [Required] The ID of the Google Cloud Platform project that the job
	// belongs to.
	string project_id = 1;

	// [Required] The Cloud Dataproc region in which to handle the request.
	string region = 6;

	// [Optional] The number of results to return in each response.
	int32 page_size = 2;

	// [Optional] The page token, returned by a previous call, to request the
	// next page of results.
	string page_token = 3;

	// [Optional] If set, the returned jobs list includes only jobs that were
	// submitted to the named cluster.
	string cluster_name = 4;

	// [Optional] Specifies enumerated categories of jobs to list
	// (default = match ALL jobs).
	JobStateMatcher job_state_matcher = 5;
	}

	// A list of jobs in a project.
	message ListJobsResponse {
	// [Output-only] Jobs list.
	repeated Job jobs = 1;

	// [Optional] This token is included in the response if there are more results
	// to fetch. To fetch additional results, provide this value as the
	// `page_token` in a subsequent <code>ListJobsRequest</code>.
	string next_page_token = 2;
	}

	// A request to cancel a job.
	message CancelJobRequest {
	// [Required] The ID of the Google Cloud Platform project that the job
	// belongs to.
	string project_id = 1;

	// [Required] The Cloud Dataproc region in which to handle the request.
	string region = 3;

	// [Required] The job ID.
	string job_id = 2;
	}

	// A request to delete a job.
	message DeleteJobRequest {
	// [Required] The ID of the Google Cloud Platform project that the job
	// belongs to.
	string project_id = 1;

	// [Required] The Cloud Dataproc region in which to handle the request.
	string region = 3;

	// [Required] The job ID.
	string job_id = 2;
	}