blob: 854ce9b972accd62dc9368a423dac92703c7fcc4 [file] [log] [blame]
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/annotations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "JobsProto";
option java_package = "com.google.cloud.dataproc.v1";
// The JobController provides methods to manage jobs.
service JobController {
// Submits a job to a cluster.
rpc SubmitJob(SubmitJobRequest) returns (Job) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" };
}
// Gets the resource representation for a job in a project.
rpc GetJob(GetJobRequest) returns (Job) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
}
// Lists regions/{region}/jobs in a project.
rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" };
}
// Starts a job cancellation request. To access the job resource
// after cancellation, call
// [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or
// [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get).
rpc CancelJob(CancelJobRequest) returns (Job) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" };
}
// Deletes the job from the project. If the job is active, the delete fails,
// and the response returns `FAILED_PRECONDITION`.
rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
}
}
// The runtime logging config of the job.
message LoggingConfig {
// The Log4j level for job execution. When running an
// [Apache Hive](http://hive.apache.org/) job, Cloud
// Dataproc configures the Hive client to an equivalent verbosity level.
enum Level {
// Level is unspecified. Use default level for log4j.
LEVEL_UNSPECIFIED = 0;
// Use ALL level for log4j.
ALL = 1;
// Use TRACE level for log4j.
TRACE = 2;
// Use DEBUG level for log4j.
DEBUG = 3;
// Use INFO level for log4j.
INFO = 4;
// Use WARN level for log4j.
WARN = 5;
// Use ERROR level for log4j.
ERROR = 6;
// Use FATAL level for log4j.
FATAL = 7;
// Turn off log4j.
OFF = 8;
}
// The per-package log levels for the driver. This may include
// "root" package name to configure rootLogger.
// Examples:
// 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
map<string, Level> driver_log_levels = 2;
}
// A Cloud Dataproc job for running
// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
message HadoopJob {
// [Required] Indicates the location of the driver's main class. Specify
// either the jar file that contains the main class or the main class name.
// To specify both, add the jar file to `jar_file_uris`, and then specify
// the main class name in this property.
oneof driver {
// The HCFS URI of the jar file containing the main class.
// Examples:
// 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
// 'hdfs:/tmp/test-samples/custom-wordcount.jar'
// 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
string main_jar_file_uri = 1;
// The name of the driver's main class. The jar file containing the class
// must be in the default CLASSPATH or specified in `jar_file_uris`.
string main_class = 2;
}
// [Optional] The arguments to pass to the driver. Do not
// include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
// properties, since a collision may occur that causes an incorrect job
// submission.
repeated string args = 3;
// [Optional] Jar file URIs to add to the CLASSPATHs of the
// Hadoop driver and tasks.
repeated string jar_file_uris = 4;
// [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
// to the working directory of Hadoop drivers and distributed tasks. Useful
// for naively parallel tasks.
repeated string file_uris = 5;
// [Optional] HCFS URIs of archives to be extracted in the working directory of
// Hadoop drivers and tasks. Supported file types:
// .jar, .tar, .tar.gz, .tgz, or .zip.
repeated string archive_uris = 6;
// [Optional] A mapping of property names to values, used to configure Hadoop.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site and
// classes in user code.
map<string, string> properties = 7;
// [Optional] The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
// applications on YARN.
message SparkJob {
// [Required] The specification of the main method to call to drive the job.
// Specify either the jar file that contains the main class or the main class
// name. To pass both a main jar and a main class in that jar, add the jar to
// `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
oneof driver {
// The HCFS URI of the jar file that contains the main class.
string main_jar_file_uri = 1;
// The name of the driver's main class. The jar file that contains the class
// must be in the default CLASSPATH or specified in `jar_file_uris`.
string main_class = 2;
}
// [Optional] The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision may
// occur that causes an incorrect job submission.
repeated string args = 3;
// [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
// Spark driver and tasks.
repeated string jar_file_uris = 4;
// [Optional] HCFS URIs of files to be copied to the working directory of
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
repeated string file_uris = 5;
// [Optional] HCFS URIs of archives to be extracted in the working directory
// of Spark drivers and tasks. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6;
// [Optional] A mapping of property names to values, used to configure Spark.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in
// /etc/spark/conf/spark-defaults.conf and classes in user code.
map<string, string> properties = 7;
// [Optional] The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A Cloud Dataproc job for running
// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
// applications on YARN.
message PySparkJob {
// [Required] The HCFS URI of the main Python file to use as the driver. Must
// be a .py file.
string main_python_file_uri = 1;
// [Optional] The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision may
// occur that causes an incorrect job submission.
repeated string args = 2;
// [Optional] HCFS file URIs of Python files to pass to the PySpark
// framework. Supported file types: .py, .egg, and .zip.
repeated string python_file_uris = 3;
// [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
// Python driver and tasks.
repeated string jar_file_uris = 4;
// [Optional] HCFS URIs of files to be copied to the working directory of
// Python drivers and distributed tasks. Useful for naively parallel tasks.
repeated string file_uris = 5;
// [Optional] HCFS URIs of archives to be extracted in the working directory of
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6;
// [Optional] A mapping of property names to values, used to configure PySpark.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in
// /etc/spark/conf/spark-defaults.conf and classes in user code.
map<string, string> properties = 7;
// [Optional] The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A list of queries to run on a cluster.
message QueryList {
// [Required] The queries to execute. You do not need to terminate a query
// with a semicolon. Multiple queries can be specified in one string
// by separating each with a semicolon. Here is an example of an Cloud
// Dataproc API snippet that uses a QueryList to specify a HiveJob:
//
// "hiveJob": {
// "queryList": {
// "queries": [
// "query1",
// "query2",
// "query3;query4",
// ]
// }
// }
repeated string queries = 1;
}
// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
// queries on YARN.
message HiveJob {
// [Required] The sequence of Hive queries to execute, specified as either
// an HCFS file URI or a list of queries.
oneof queries {
// The HCFS URI of the script that contains Hive queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// [Optional] Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when executing
// independent parallel queries.
bool continue_on_failure = 3;
// [Optional] Mapping of query variable names to values (equivalent to the
// Hive command: `SET name="value";`).
map<string, string> script_variables = 4;
// [Optional] A mapping of property names and values, used to configure Hive.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
// /etc/hive/conf/hive-site.xml, and classes in user code.
map<string, string> properties = 5;
// [Optional] HCFS URIs of jar files to add to the CLASSPATH of the
// Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
// and UDFs.
repeated string jar_file_uris = 6;
}
// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
// queries.
message SparkSqlJob {
// [Required] The sequence of Spark SQL queries to execute, specified as
// either an HCFS file URI or as a list of queries.
oneof queries {
// The HCFS URI of the script that contains SQL queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// [Optional] Mapping of query variable names to values (equivalent to the
// Spark SQL command: SET `name="value";`).
map<string, string> script_variables = 3;
// [Optional] A mapping of property names to values, used to configure
// Spark SQL's SparkConf. Properties that conflict with values set by the
// Cloud Dataproc API may be overwritten.
map<string, string> properties = 4;
// [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
repeated string jar_file_uris = 56;
// [Optional] The runtime log config for job execution.
LoggingConfig logging_config = 6;
}
// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
// queries on YARN.
message PigJob {
// [Required] The sequence of Pig queries to execute, specified as an HCFS
// file URI or a list of queries.
oneof queries {
// The HCFS URI of the script that contains the Pig queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// [Optional] Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when executing
// independent parallel queries.
bool continue_on_failure = 3;
// [Optional] Mapping of query variable names to values (equivalent to the Pig
// command: `name=[value]`).
map<string, string> script_variables = 4;
// [Optional] A mapping of property names to values, used to configure Pig.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
// /etc/pig/conf/pig.properties, and classes in user code.
map<string, string> properties = 5;
// [Optional] HCFS URIs of jar files to add to the CLASSPATH of
// the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
repeated string jar_file_uris = 6;
// [Optional] The runtime log config for job execution.
LoggingConfig logging_config = 7;
}
// Cloud Dataproc job config.
message JobPlacement {
// [Required] The name of the cluster where the job will be submitted.
string cluster_name = 1;
// [Output-only] A cluster UUID generated by the Cloud Dataproc service when
// the job is submitted.
string cluster_uuid = 2;
}
// Cloud Dataproc job status.
message JobStatus {
// The job state.
enum State {
// The job state is unknown.
STATE_UNSPECIFIED = 0;
// The job is pending; it has been submitted, but is not yet running.
PENDING = 1;
// Job has been received by the service and completed initial setup;
// it will soon be submitted to the cluster.
SETUP_DONE = 8;
// The job is running on the cluster.
RUNNING = 2;
// A CancelJob request has been received, but is pending.
CANCEL_PENDING = 3;
// Transient in-flight resources have been canceled, and the request to
// cancel the running job has been issued to the cluster.
CANCEL_STARTED = 7;
// The job cancellation was successful.
CANCELLED = 4;
// The job has completed successfully.
DONE = 5;
// The job has completed, but encountered an error.
ERROR = 6;
}
// [Output-only] A state message specifying the overall job state.
State state = 1;
// [Output-only] Optional job state details, such as an error
// description if the state is <code>ERROR</code>.
string details = 2;
// [Output-only] The time when this state was entered.
google.protobuf.Timestamp state_start_time = 6;
}
// Encapsulates the full scoping used to reference a job.
message JobReference {
// [Required] The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// [Optional] The job ID, which must be unique within the project. The job ID
// is generated by the server upon job submission or provided by the user as a
// means to perform retries without creating duplicate jobs. The ID must
// contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
// hyphens (-). The maximum length is 512 characters.
string job_id = 2;
}
// A Cloud Dataproc job resource.
message Job {
// [Optional] The fully qualified reference to the job, which can be used to
// obtain the equivalent REST path of the job resource. If this property
// is not specified when a job is created, the server generates a
// <code>job_id</code>.
JobReference reference = 1;
// [Required] Job information, including how, when, and where to
// run the job.
JobPlacement placement = 2;
// [Required] The application/framework-specific portion of the job.
oneof type_job {
// Job is a Hadoop job.
HadoopJob hadoop_job = 3;
// Job is a Spark job.
SparkJob spark_job = 4;
// Job is a Pyspark job.
PySparkJob pyspark_job = 5;
// Job is a Hive job.
HiveJob hive_job = 6;
// Job is a Pig job.
PigJob pig_job = 7;
// Job is a SparkSql job.
SparkSqlJob spark_sql_job = 12;
}
// [Output-only] The job status. Additional application-specific
// status information may be contained in the <code>type_job</code>
// and <code>yarn_applications</code> fields.
JobStatus status = 8;
// [Output-only] The previous job status.
repeated JobStatus status_history = 13;
// [Output-only] A URI pointing to the location of the stdout of the job's
// driver program.
string driver_output_resource_uri = 17;
// [Output-only] If present, the location of miscellaneous control files
// which may be used as part of job setup and handling. If not present,
// control files may be placed in the same location as `driver_output_uri`.
string driver_control_files_uri = 15;
}
// A request to submit a job.
message SubmitJobRequest {
// [Required] The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The job resource.
Job job = 2;
}
// A request to get the resource representation for a job in a project.
message GetJobRequest {
// [Required] The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The job ID.
string job_id = 2;
}
// A request to list jobs in a project.
message ListJobsRequest {
// A matcher that specifies categories of job states.
enum JobStateMatcher {
// Match all jobs, regardless of state.
ALL = 0;
// Only match jobs in non-terminal states: PENDING, RUNNING, or
// CANCEL_PENDING.
ACTIVE = 1;
// Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
NON_ACTIVE = 2;
}
// [Required] The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 6;
// [Optional] The number of results to return in each response.
int32 page_size = 2;
// [Optional] The page token, returned by a previous call, to request the
// next page of results.
string page_token = 3;
// [Optional] If set, the returned jobs list includes only jobs that were
// submitted to the named cluster.
string cluster_name = 4;
// [Optional] Specifies enumerated categories of jobs to list
// (default = match ALL jobs).
JobStateMatcher job_state_matcher = 5;
}
// A list of jobs in a project.
message ListJobsResponse {
// [Output-only] Jobs list.
repeated Job jobs = 1;
// [Optional] This token is included in the response if there are more results
// to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent <code>ListJobsRequest</code>.
string next_page_token = 2;
}
// A request to cancel a job.
message CancelJobRequest {
// [Required] The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The job ID.
string job_id = 2;
}
// A request to delete a job.
message DeleteJobRequest {
// [Required] The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// [Required] The Cloud Dataproc region in which to handle the request.
string region = 3;
// [Required] The job ID.
string job_id = 2;
}