blob: 1ac71e175cbcf84cdfa9c7db0d5f3aa9d8ae045f [file] [log] [blame]
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.ml.v1beta1;
import "google/api/annotations.proto";
import "google/api/auth.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/ml/v1beta1;ml";
option java_multiple_files = true;
option java_outer_classname = "JobServiceProto";
option java_package = "com.google.cloud.ml.api.v1beta1";
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Proto file for the Google Cloud Machine Learning Engine.
// Describes the 'job service' to manage training and prediction jobs.
// Service to create and manage training and batch prediction jobs.
service JobService {
// Creates a training or a batch prediction job.
rpc CreateJob(CreateJobRequest) returns (Job) {
option (google.api.http) = { post: "/v1beta1/{parent=projects/*}/jobs" body: "job" };
}
// Lists the jobs in the project.
rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
option (google.api.http) = { get: "/v1beta1/{parent=projects/*}/jobs" };
}
// Describes a job.
rpc GetJob(GetJobRequest) returns (Job) {
option (google.api.http) = { get: "/v1beta1/{name=projects/*/jobs/*}" };
}
// Cancels a running job.
rpc CancelJob(CancelJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { post: "/v1beta1/{name=projects/*/jobs/*}:cancel" body: "*" };
}
}
// Represents input parameters for a training job.
message TrainingInput {
// A scale tier is an abstract representation of the resources Cloud ML
// will allocate to a training job. When selecting a scale tier for your
// training job, you should consider the size of your training dataset and
// the complexity of your model. As the tiers increase, virtual machines are
// added to handle your job, and the individual machines in the cluster
// generally have more memory and greater processing power than they do at
// lower tiers. The number of training units charged per hour of processing
// increases as tiers get more advanced. Refer to the
// [pricing guide](/ml/pricing) for more details. Note that in addition to
// incurring costs, your use of training resources is constrained by the
// [quota policy](/ml/quota).
enum ScaleTier {
// A single worker instance. This tier is suitable for learning how to use
// Cloud ML, and for experimenting with new models using small datasets.
BASIC = 0;
// Many workers and a few parameter servers.
STANDARD_1 = 1;
// A large number of workers with many parameter servers.
PREMIUM_1 = 3;
// A single worker instance [with a GPU](ml/docs/how-tos/using-gpus).
BASIC_GPU = 6;
// The CUSTOM tier is not a set tier, but rather enables you to use your
// own cluster specification. When you use this tier, set values to
// configure your processing cluster according to these guidelines:
//
// * You _must_ set `TrainingInput.masterType` to specify the type
// of machine to use for your master node. This is the only required
// setting.
//
// * You _may_ set `TrainingInput.workerCount` to specify the number of
// workers to use. If you specify one or more workers, you _must_ also
// set `TrainingInput.workerType` to specify the type of machine to use
// for your worker nodes.
//
// * You _may_ set `TrainingInput.parameterServerCount` to specify the
// number of parameter servers to use. If you specify one or more
// parameter servers, you _must_ also set
// `TrainingInput.parameterServerType` to specify the type of machine to
// use for your parameter servers.
//
// Note that all of your workers must use the same machine type, which can
// be different from your parameter server type and master type. Your
// parameter servers must likewise use the same machine type, which can be
// different from your worker type and master type.
CUSTOM = 5;
}
// Required. Specifies the machine types, the number of replicas for workers
// and parameter servers.
ScaleTier scale_tier = 1;
// Optional. Specifies the type of virtual machine to use for your training
// job's master worker.
//
// The following types are supported:
//
// <dl>
// <dt>standard</dt>
// <dd>
// A basic machine configuration suitable for training simple models with
// small to moderate datasets.
// </dd>
// <dt>large_model</dt>
// <dd>
// A machine with a lot of memory, specially suited for parameter servers
// when your model is large (having many hidden layers or layers with very
// large numbers of nodes).
// </dd>
// <dt>complex_model_s</dt>
// <dd>
// A machine suitable for the master and workers of the cluster when your
// model requires more computation than the standard machine can handle
// satisfactorily.
// </dd>
// <dt>complex_model_m</dt>
// <dd>
// A machine with roughly twice the number of cores and roughly double the
// memory of <code suppresswarning="true">complex_model_s</code>.
// </dd>
// <dt>complex_model_l</dt>
// <dd>
// A machine with roughly twice the number of cores and roughly double the
// memory of <code suppresswarning="true">complex_model_m</code>.
// </dd>
// <dt>standard_gpu</dt>
// <dd>
// A machine equivalent to <code suppresswarning="true">standard</code> that
// also includes a
// <a href="ml/docs/how-tos/using-gpus">
// GPU that you can use in your trainer</a>.
// </dd>
// <dt>complex_model_m_gpu</dt>
// <dd>
// A machine equivalent to
// <code suppresswarning="true">coplex_model_m</code> that also includes
// four GPUs.
// </dd>
// </dl>
//
// You must set this value when `scaleTier` is set to `CUSTOM`.
string master_type = 2;
// Optional. Specifies the type of virtual machine to use for your training
// job's worker nodes.
//
// The supported values are the same as those described in the entry for
// `masterType`.
//
// This value must be present when `scaleTier` is set to `CUSTOM` and
// `workerCount` is greater than zero.
string worker_type = 3;
// Optional. Specifies the type of virtual machine to use for your training
// job's parameter server.
//
// The supported values are the same as those described in the entry for
// `master_type`.
//
// This value must be present when `scaleTier` is set to `CUSTOM` and
// `parameter_server_count` is greater than zero.
string parameter_server_type = 4;
// Optional. The number of worker replicas to use for the training job. Each
// replica in the cluster will be of the type specified in `worker_type`.
//
// This value can only be used when `scale_tier` is set to `CUSTOM`. If you
// set this value, you must also set `worker_type`.
int64 worker_count = 5;
// Optional. The number of parameter server replicas to use for the training
// job. Each replica in the cluster will be of the type specified in
// `parameter_server_type`.
//
// This value can only be used when `scale_tier` is set to `CUSTOM`.If you
// set this value, you must also set `parameter_server_type`.
int64 parameter_server_count = 6;
// Required. The Google Cloud Storage location of the packages with
// the training program and any additional dependencies.
repeated string package_uris = 7;
// Required. The Python module name to run after installing the packages.
string python_module = 8;
// Optional. Command line arguments to pass to the program.
repeated string args = 10;
// Optional. The set of Hyperparameters to tune.
HyperparameterSpec hyperparameters = 12;
// Required. The Google Compute Engine region to run the training job in.
string region = 14;
// Optional. A Google Cloud Storage path in which to store training outputs
// and other data needed for training. This path is passed to your TensorFlow
// program as the 'job_dir' command-line argument. The benefit of specifying
// this field is that Cloud ML validates the path for use in training.
string job_dir = 16;
// Optional. The Google Cloud ML runtime version to use for training. If not
// set, Google Cloud ML will choose the latest stable version.
string runtime_version = 15;
}
// Represents a set of hyperparameters to optimize.
message HyperparameterSpec {
// The available types of optimization goals.
enum GoalType {
// Goal Type will default to maximize.
GOAL_TYPE_UNSPECIFIED = 0;
// Maximize the goal metric.
MAXIMIZE = 1;
// Minimize the goal metric.
MINIMIZE = 2;
}
// Required. The type of goal to use for tuning. Available types are
// `MAXIMIZE` and `MINIMIZE`.
//
// Defaults to `MAXIMIZE`.
GoalType goal = 1;
// Required. The set of parameters to tune.
repeated ParameterSpec params = 2;
// Optional. How many training trials should be attempted to optimize
// the specified hyperparameters.
//
// Defaults to one.
int32 max_trials = 3;
// Optional. The number of training trials to run concurrently.
// You can reduce the time it takes to perform hyperparameter tuning by adding
// trials in parallel. However, each trail only benefits from the information
// gained in completed trials. That means that a trial does not get access to
// the results of trials running at the same time, which could reduce the
// quality of the overall optimization.
//
// Each trial will use the same scale tier and machine types.
//
// Defaults to one.
int32 max_parallel_trials = 4;
// Optional. The Tensorflow summary tag name to use for optimizing trials. For
// current versions of Tensorflow, this tag name should exactly match what is
// shown in Tensorboard, including all scopes. For versions of Tensorflow
// prior to 0.12, this should be only the tag passed to tf.Summary.
// By default, "training/hptuning/metric" will be used.
string hyperparameter_metric_tag = 5;
}
// Represents a single hyperparameter to optimize.
message ParameterSpec {
// The type of the parameter.
enum ParameterType {
// You must specify a valid type. Using this unspecified type will result in
// an error.
PARAMETER_TYPE_UNSPECIFIED = 0;
// Type for real-valued parameters.
DOUBLE = 1;
// Type for integral parameters.
INTEGER = 2;
// The parameter is categorical, with a value chosen from the categories
// field.
CATEGORICAL = 3;
// The parameter is real valued, with a fixed set of feasible points. If
// `type==DISCRETE`, feasible_points must be provided, and
// {`min_value`, `max_value`} will be ignored.
DISCRETE = 4;
}
// The type of scaling that should be applied to this parameter.
enum ScaleType {
// By default, no scaling is applied.
NONE = 0;
// Scales the feasible space to (0, 1) linearly.
UNIT_LINEAR_SCALE = 1;
// Scales the feasible space logarithmically to (0, 1). The entire feasible
// space must be strictly positive.
UNIT_LOG_SCALE = 2;
// Scales the feasible space "reverse" logarithmically to (0, 1). The result
// is that values close to the top of the feasible space are spread out more
// than points near the bottom. The entire feasible space must be strictly
// positive.
UNIT_REVERSE_LOG_SCALE = 3;
}
// Required. The parameter name must be unique amongst all ParameterConfigs in
// a HyperparameterSpec message. E.g., "learning_rate".
string parameter_name = 1;
// Required. The type of the parameter.
ParameterType type = 4;
// Required if type is `DOUBLE` or `INTEGER`. This field
// should be unset if type is `CATEGORICAL`. This value should be integers if
// type is INTEGER.
double min_value = 2;
// Required if typeis `DOUBLE` or `INTEGER`. This field
// should be unset if type is `CATEGORICAL`. This value should be integers if
// type is `INTEGER`.
double max_value = 3;
// Required if type is `CATEGORICAL`. The list of possible categories.
repeated string categorical_values = 5;
// Required if type is `DISCRETE`.
// A list of feasible points.
// The list should be in strictly increasing order. For instance, this
// parameter might have possible settings of 1.5, 2.5, and 4.0. This list
// should not contain more than 1,000 values.
repeated double discrete_values = 6;
// Optional. How the parameter should be scaled to the hypercube.
// Leave unset for categorical parameters.
// Some kind of scaling is strongly recommended for real or integral
// parameters (e.g., `UNIT_LINEAR_SCALE`).
ScaleType scale_type = 7;
}
// Represents the result of a single hyperparameter tuning trial from a
// training job. The TrainingOutput object that is returned on successful
// completion of a training job with hyperparameter tuning includes a list
// of HyperparameterOutput objects, one for each successful trial.
message HyperparameterOutput {
// An observed value of a metric.
message HyperparameterMetric {
// The global training step for this metric.
int64 training_step = 1;
// The objective value at this training step.
double objective_value = 2;
}
// The trial id for these results.
string trial_id = 1;
// The hyperparameters given to this trial.
map<string, string> hyperparameters = 2;
// The final objective metric seen for this trial.
HyperparameterMetric final_metric = 3;
// All recorded object metrics for this trial.
repeated HyperparameterMetric all_metrics = 4;
}
// Represents results of a training job. Output only.
message TrainingOutput {
// The number of hyperparameter tuning trials that completed successfully.
// Only set for hyperparameter tuning jobs.
int64 completed_trial_count = 1;
// Results for individual Hyperparameter trials.
// Only set for hyperparameter tuning jobs.
repeated HyperparameterOutput trials = 2;
// The amount of ML units consumed by the job.
double consumed_ml_units = 3;
// Whether this job is a hyperparameter tuning job.
bool is_hyperparameter_tuning_job = 4;
}
// Represents input parameters for a prediction job.
message PredictionInput {
// The format used to separate data instances in the source files.
enum DataFormat {
// Unspecified format.
DATA_FORMAT_UNSPECIFIED = 0;
// The source file is a text file with instances separated by the
// new-line character.
TEXT = 1;
// The source file is a TFRecord file.
TF_RECORD = 2;
// The source file is a GZIP-compressed TFRecord file.
TF_RECORD_GZIP = 3;
}
// Required. The model or the version to use for prediction.
oneof model_version {
// Use this field if you want to use the default version for the specified
// model. The string must use the following format:
//
// `"projects/<var>[YOUR_PROJECT]</var>/models/<var>[YOUR_MODEL]</var>"`
string model_name = 1;
// Use this field if you want to specify a version of the model to use. The
// string is formatted the same way as `model_version`, with the addition
// of the version information:
//
// `"projects/<var>[YOUR_PROJECT]</var>/models/<var>YOUR_MODEL/versions/<var>[YOUR_VERSION]</var>"`
string version_name = 2;
// Use this field if you want to specify a Google Cloud Storage path for
// the model to use.
string uri = 9;
}
// Required. The format of the input data files.
DataFormat data_format = 3;
// Required. The Google Cloud Storage location of the input data files.
// May contain wildcards.
repeated string input_paths = 4;
// Required. The output Google Cloud Storage location.
string output_path = 5;
// Optional. The maximum number of workers to be used for parallel processing.
// Defaults to 10 if not specified.
int64 max_worker_count = 6;
// Required. The Google Compute Engine region to run the prediction job in.
string region = 7;
// Optional. The Google Cloud ML runtime version to use for this batch
// prediction. If not set, Google Cloud ML will pick the runtime version used
// during the CreateVersion request for this model version, or choose the
// latest stable version when model version information is not available
// such as when the model is specified by uri.
string runtime_version = 8;
}
// Represents results of a prediction job.
message PredictionOutput {
// The output Google Cloud Storage location provided at the job creation time.
string output_path = 1;
// The number of generated predictions.
int64 prediction_count = 2;
// The number of data instances which resulted in errors.
int64 error_count = 3;
// Node hours used by the batch prediction job.
double node_hours = 4;
}
// Represents a training or prediction job.
message Job {
// Describes the job state.
enum State {
// The job state is unspecified.
STATE_UNSPECIFIED = 0;
// The job has been just created and processing has not yet begun.
QUEUED = 1;
// The service is preparing to run the job.
PREPARING = 2;
// The job is in progress.
RUNNING = 3;
// The job completed successfully.
SUCCEEDED = 4;
// The job failed.
// `error_message` should contain the details of the failure.
FAILED = 5;
// The job is being cancelled.
// `error_message` should describe the reason for the cancellation.
CANCELLING = 6;
// The job has been cancelled.
// `error_message` should describe the reason for the cancellation.
CANCELLED = 7;
}
// Required. The user-specified id of the job.
string job_id = 1;
// Required. Parameters to create a job.
oneof input {
// Input parameters to create a training job.
TrainingInput training_input = 2;
// Input parameters to create a prediction job.
PredictionInput prediction_input = 3;
}
// Output only. When the job was created.
google.protobuf.Timestamp create_time = 4;
// Output only. When the job processing was started.
google.protobuf.Timestamp start_time = 5;
// Output only. When the job processing was completed.
google.protobuf.Timestamp end_time = 6;
// Output only. The detailed state of a job.
State state = 7;
// Output only. The details of a failure or a cancellation.
string error_message = 8;
// Output only. The current result of the job.
oneof output {
// The current training job result.
TrainingOutput training_output = 9;
// The current prediction job result.
PredictionOutput prediction_output = 10;
}
}
// Request message for the CreateJob method.
message CreateJobRequest {
// Required. The project name.
//
// Authorization: requires `Editor` role on the specified project.
string parent = 1;
// Required. The job to create.
Job job = 2;
}
// Request message for the ListJobs method.
message ListJobsRequest {
// Required. The name of the project for which to list jobs.
//
// Authorization: requires `Viewer` role on the specified project.
string parent = 1;
// Optional. Specifies the subset of jobs to retrieve.
string filter = 2;
// Optional. A page token to request the next page of results.
//
// You get the token from the `next_page_token` field of the response from
// the previous call.
string page_token = 4;
// Optional. The number of jobs to retrieve per "page" of results. If there
// are more remaining results than this number, the response message will
// contain a valid value in the `next_page_token` field.
//
// The default value is 20, and the maximum page size is 100.
int32 page_size = 5;
}
// Response message for the ListJobs method.
message ListJobsResponse {
// The list of jobs.
repeated Job jobs = 1;
// Optional. Pass this token as the `page_token` field of the request for a
// subsequent call.
string next_page_token = 2;
}
// Request message for the GetJob method.
message GetJobRequest {
// Required. The name of the job to get the description of.
//
// Authorization: requires `Viewer` role on the parent project.
string name = 1;
}
// Request message for the CancelJob method.
message CancelJobRequest {
// Required. The name of the job to cancel.
//
// Authorization: requires `Editor` role on the parent project.
string name = 1;
}