| // Copyright 2017 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto3"; |
| |
| package google.cloud.ml.v1beta1; |
| |
| import "google/api/annotations.proto"; |
| import "google/api/auth.proto"; |
| import "google/protobuf/empty.proto"; |
| import "google/protobuf/timestamp.proto"; |
| |
| option go_package = "google.golang.org/genproto/googleapis/cloud/ml/v1beta1;ml"; |
| option java_multiple_files = true; |
| option java_outer_classname = "JobServiceProto"; |
| option java_package = "com.google.cloud.ml.api.v1beta1"; |
| |
| // Copyright 2016 Google Inc. All Rights Reserved. |
| // |
| // Proto file for the Google Cloud Machine Learning Engine. |
| // Describes the 'job service' to manage training and prediction jobs. |
| |
| |
| |
| // Service to create and manage training and batch prediction jobs. |
| service JobService { |
| // Creates a training or a batch prediction job. |
| rpc CreateJob(CreateJobRequest) returns (Job) { |
| option (google.api.http) = { post: "/v1beta1/{parent=projects/*}/jobs" body: "job" }; |
| } |
| |
| // Lists the jobs in the project. |
| rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { |
| option (google.api.http) = { get: "/v1beta1/{parent=projects/*}/jobs" }; |
| } |
| |
| // Describes a job. |
| rpc GetJob(GetJobRequest) returns (Job) { |
| option (google.api.http) = { get: "/v1beta1/{name=projects/*/jobs/*}" }; |
| } |
| |
| // Cancels a running job. |
| rpc CancelJob(CancelJobRequest) returns (google.protobuf.Empty) { |
| option (google.api.http) = { post: "/v1beta1/{name=projects/*/jobs/*}:cancel" body: "*" }; |
| } |
| } |
| |
| // Represents input parameters for a training job. |
| message TrainingInput { |
| // A scale tier is an abstract representation of the resources Cloud ML |
| // will allocate to a training job. When selecting a scale tier for your |
| // training job, you should consider the size of your training dataset and |
| // the complexity of your model. As the tiers increase, virtual machines are |
| // added to handle your job, and the individual machines in the cluster |
| // generally have more memory and greater processing power than they do at |
| // lower tiers. The number of training units charged per hour of processing |
| // increases as tiers get more advanced. Refer to the |
| // [pricing guide](/ml/pricing) for more details. Note that in addition to |
| // incurring costs, your use of training resources is constrained by the |
| // [quota policy](/ml/quota). |
| enum ScaleTier { |
| // A single worker instance. This tier is suitable for learning how to use |
| // Cloud ML, and for experimenting with new models using small datasets. |
| BASIC = 0; |
| |
| // Many workers and a few parameter servers. |
| STANDARD_1 = 1; |
| |
| // A large number of workers with many parameter servers. |
| PREMIUM_1 = 3; |
| |
| // A single worker instance [with a GPU](ml/docs/how-tos/using-gpus). |
| BASIC_GPU = 6; |
| |
| // The CUSTOM tier is not a set tier, but rather enables you to use your |
| // own cluster specification. When you use this tier, set values to |
| // configure your processing cluster according to these guidelines: |
| // |
| // * You _must_ set `TrainingInput.masterType` to specify the type |
| // of machine to use for your master node. This is the only required |
| // setting. |
| // |
| // * You _may_ set `TrainingInput.workerCount` to specify the number of |
| // workers to use. If you specify one or more workers, you _must_ also |
| // set `TrainingInput.workerType` to specify the type of machine to use |
| // for your worker nodes. |
| // |
| // * You _may_ set `TrainingInput.parameterServerCount` to specify the |
| // number of parameter servers to use. If you specify one or more |
| // parameter servers, you _must_ also set |
| // `TrainingInput.parameterServerType` to specify the type of machine to |
| // use for your parameter servers. |
| // |
| // Note that all of your workers must use the same machine type, which can |
| // be different from your parameter server type and master type. Your |
| // parameter servers must likewise use the same machine type, which can be |
| // different from your worker type and master type. |
| CUSTOM = 5; |
| } |
| |
| // Required. Specifies the machine types, the number of replicas for workers |
| // and parameter servers. |
| ScaleTier scale_tier = 1; |
| |
| // Optional. Specifies the type of virtual machine to use for your training |
| // job's master worker. |
| // |
| // The following types are supported: |
| // |
| // <dl> |
| // <dt>standard</dt> |
| // <dd> |
| // A basic machine configuration suitable for training simple models with |
| // small to moderate datasets. |
| // </dd> |
| // <dt>large_model</dt> |
| // <dd> |
| // A machine with a lot of memory, specially suited for parameter servers |
| // when your model is large (having many hidden layers or layers with very |
| // large numbers of nodes). |
| // </dd> |
| // <dt>complex_model_s</dt> |
| // <dd> |
| // A machine suitable for the master and workers of the cluster when your |
| // model requires more computation than the standard machine can handle |
| // satisfactorily. |
| // </dd> |
| // <dt>complex_model_m</dt> |
| // <dd> |
| // A machine with roughly twice the number of cores and roughly double the |
| // memory of <code suppresswarning="true">complex_model_s</code>. |
| // </dd> |
| // <dt>complex_model_l</dt> |
| // <dd> |
| // A machine with roughly twice the number of cores and roughly double the |
| // memory of <code suppresswarning="true">complex_model_m</code>. |
| // </dd> |
| // <dt>standard_gpu</dt> |
| // <dd> |
| // A machine equivalent to <code suppresswarning="true">standard</code> that |
| // also includes a |
| // <a href="ml/docs/how-tos/using-gpus"> |
| // GPU that you can use in your trainer</a>. |
| // </dd> |
| // <dt>complex_model_m_gpu</dt> |
| // <dd> |
| // A machine equivalent to |
| // <code suppresswarning="true">coplex_model_m</code> that also includes |
| // four GPUs. |
| // </dd> |
| // </dl> |
| // |
| // You must set this value when `scaleTier` is set to `CUSTOM`. |
| string master_type = 2; |
| |
| // Optional. Specifies the type of virtual machine to use for your training |
| // job's worker nodes. |
| // |
| // The supported values are the same as those described in the entry for |
| // `masterType`. |
| // |
| // This value must be present when `scaleTier` is set to `CUSTOM` and |
| // `workerCount` is greater than zero. |
| string worker_type = 3; |
| |
| // Optional. Specifies the type of virtual machine to use for your training |
| // job's parameter server. |
| // |
| // The supported values are the same as those described in the entry for |
| // `master_type`. |
| // |
| // This value must be present when `scaleTier` is set to `CUSTOM` and |
| // `parameter_server_count` is greater than zero. |
| string parameter_server_type = 4; |
| |
| // Optional. The number of worker replicas to use for the training job. Each |
| // replica in the cluster will be of the type specified in `worker_type`. |
| // |
| // This value can only be used when `scale_tier` is set to `CUSTOM`. If you |
| // set this value, you must also set `worker_type`. |
| int64 worker_count = 5; |
| |
| // Optional. The number of parameter server replicas to use for the training |
| // job. Each replica in the cluster will be of the type specified in |
| // `parameter_server_type`. |
| // |
| // This value can only be used when `scale_tier` is set to `CUSTOM`.If you |
| // set this value, you must also set `parameter_server_type`. |
| int64 parameter_server_count = 6; |
| |
| // Required. The Google Cloud Storage location of the packages with |
| // the training program and any additional dependencies. |
| repeated string package_uris = 7; |
| |
| // Required. The Python module name to run after installing the packages. |
| string python_module = 8; |
| |
| // Optional. Command line arguments to pass to the program. |
| repeated string args = 10; |
| |
| // Optional. The set of Hyperparameters to tune. |
| HyperparameterSpec hyperparameters = 12; |
| |
| // Required. The Google Compute Engine region to run the training job in. |
| string region = 14; |
| |
| // Optional. A Google Cloud Storage path in which to store training outputs |
| // and other data needed for training. This path is passed to your TensorFlow |
| // program as the 'job_dir' command-line argument. The benefit of specifying |
| // this field is that Cloud ML validates the path for use in training. |
| string job_dir = 16; |
| |
| // Optional. The Google Cloud ML runtime version to use for training. If not |
| // set, Google Cloud ML will choose the latest stable version. |
| string runtime_version = 15; |
| } |
| |
| // Represents a set of hyperparameters to optimize. |
| message HyperparameterSpec { |
| // The available types of optimization goals. |
| enum GoalType { |
| // Goal Type will default to maximize. |
| GOAL_TYPE_UNSPECIFIED = 0; |
| |
| // Maximize the goal metric. |
| MAXIMIZE = 1; |
| |
| // Minimize the goal metric. |
| MINIMIZE = 2; |
| } |
| |
| // Required. The type of goal to use for tuning. Available types are |
| // `MAXIMIZE` and `MINIMIZE`. |
| // |
| // Defaults to `MAXIMIZE`. |
| GoalType goal = 1; |
| |
| // Required. The set of parameters to tune. |
| repeated ParameterSpec params = 2; |
| |
| // Optional. How many training trials should be attempted to optimize |
| // the specified hyperparameters. |
| // |
| // Defaults to one. |
| int32 max_trials = 3; |
| |
| // Optional. The number of training trials to run concurrently. |
| // You can reduce the time it takes to perform hyperparameter tuning by adding |
| // trials in parallel. However, each trail only benefits from the information |
| // gained in completed trials. That means that a trial does not get access to |
| // the results of trials running at the same time, which could reduce the |
| // quality of the overall optimization. |
| // |
| // Each trial will use the same scale tier and machine types. |
| // |
| // Defaults to one. |
| int32 max_parallel_trials = 4; |
| |
| // Optional. The Tensorflow summary tag name to use for optimizing trials. For |
| // current versions of Tensorflow, this tag name should exactly match what is |
| // shown in Tensorboard, including all scopes. For versions of Tensorflow |
| // prior to 0.12, this should be only the tag passed to tf.Summary. |
| // By default, "training/hptuning/metric" will be used. |
| string hyperparameter_metric_tag = 5; |
| } |
| |
| // Represents a single hyperparameter to optimize. |
| message ParameterSpec { |
| // The type of the parameter. |
| enum ParameterType { |
| // You must specify a valid type. Using this unspecified type will result in |
| // an error. |
| PARAMETER_TYPE_UNSPECIFIED = 0; |
| |
| // Type for real-valued parameters. |
| DOUBLE = 1; |
| |
| // Type for integral parameters. |
| INTEGER = 2; |
| |
| // The parameter is categorical, with a value chosen from the categories |
| // field. |
| CATEGORICAL = 3; |
| |
| // The parameter is real valued, with a fixed set of feasible points. If |
| // `type==DISCRETE`, feasible_points must be provided, and |
| // {`min_value`, `max_value`} will be ignored. |
| DISCRETE = 4; |
| } |
| |
| // The type of scaling that should be applied to this parameter. |
| enum ScaleType { |
| // By default, no scaling is applied. |
| NONE = 0; |
| |
| // Scales the feasible space to (0, 1) linearly. |
| UNIT_LINEAR_SCALE = 1; |
| |
| // Scales the feasible space logarithmically to (0, 1). The entire feasible |
| // space must be strictly positive. |
| UNIT_LOG_SCALE = 2; |
| |
| // Scales the feasible space "reverse" logarithmically to (0, 1). The result |
| // is that values close to the top of the feasible space are spread out more |
| // than points near the bottom. The entire feasible space must be strictly |
| // positive. |
| UNIT_REVERSE_LOG_SCALE = 3; |
| } |
| |
| // Required. The parameter name must be unique amongst all ParameterConfigs in |
| // a HyperparameterSpec message. E.g., "learning_rate". |
| string parameter_name = 1; |
| |
| // Required. The type of the parameter. |
| ParameterType type = 4; |
| |
| // Required if type is `DOUBLE` or `INTEGER`. This field |
| // should be unset if type is `CATEGORICAL`. This value should be integers if |
| // type is INTEGER. |
| double min_value = 2; |
| |
| // Required if typeis `DOUBLE` or `INTEGER`. This field |
| // should be unset if type is `CATEGORICAL`. This value should be integers if |
| // type is `INTEGER`. |
| double max_value = 3; |
| |
| // Required if type is `CATEGORICAL`. The list of possible categories. |
| repeated string categorical_values = 5; |
| |
| // Required if type is `DISCRETE`. |
| // A list of feasible points. |
| // The list should be in strictly increasing order. For instance, this |
| // parameter might have possible settings of 1.5, 2.5, and 4.0. This list |
| // should not contain more than 1,000 values. |
| repeated double discrete_values = 6; |
| |
| // Optional. How the parameter should be scaled to the hypercube. |
| // Leave unset for categorical parameters. |
| // Some kind of scaling is strongly recommended for real or integral |
| // parameters (e.g., `UNIT_LINEAR_SCALE`). |
| ScaleType scale_type = 7; |
| } |
| |
| // Represents the result of a single hyperparameter tuning trial from a |
| // training job. The TrainingOutput object that is returned on successful |
| // completion of a training job with hyperparameter tuning includes a list |
| // of HyperparameterOutput objects, one for each successful trial. |
| message HyperparameterOutput { |
| // An observed value of a metric. |
| message HyperparameterMetric { |
| // The global training step for this metric. |
| int64 training_step = 1; |
| |
| // The objective value at this training step. |
| double objective_value = 2; |
| } |
| |
| // The trial id for these results. |
| string trial_id = 1; |
| |
| // The hyperparameters given to this trial. |
| map<string, string> hyperparameters = 2; |
| |
| // The final objective metric seen for this trial. |
| HyperparameterMetric final_metric = 3; |
| |
| // All recorded object metrics for this trial. |
| repeated HyperparameterMetric all_metrics = 4; |
| } |
| |
| // Represents results of a training job. Output only. |
| message TrainingOutput { |
| // The number of hyperparameter tuning trials that completed successfully. |
| // Only set for hyperparameter tuning jobs. |
| int64 completed_trial_count = 1; |
| |
| // Results for individual Hyperparameter trials. |
| // Only set for hyperparameter tuning jobs. |
| repeated HyperparameterOutput trials = 2; |
| |
| // The amount of ML units consumed by the job. |
| double consumed_ml_units = 3; |
| |
| // Whether this job is a hyperparameter tuning job. |
| bool is_hyperparameter_tuning_job = 4; |
| } |
| |
| // Represents input parameters for a prediction job. |
| message PredictionInput { |
| // The format used to separate data instances in the source files. |
| enum DataFormat { |
| // Unspecified format. |
| DATA_FORMAT_UNSPECIFIED = 0; |
| |
| // The source file is a text file with instances separated by the |
| // new-line character. |
| TEXT = 1; |
| |
| // The source file is a TFRecord file. |
| TF_RECORD = 2; |
| |
| // The source file is a GZIP-compressed TFRecord file. |
| TF_RECORD_GZIP = 3; |
| } |
| |
| // Required. The model or the version to use for prediction. |
| oneof model_version { |
| // Use this field if you want to use the default version for the specified |
| // model. The string must use the following format: |
| // |
| // `"projects/<var>[YOUR_PROJECT]</var>/models/<var>[YOUR_MODEL]</var>"` |
| string model_name = 1; |
| |
| // Use this field if you want to specify a version of the model to use. The |
| // string is formatted the same way as `model_version`, with the addition |
| // of the version information: |
| // |
| // `"projects/<var>[YOUR_PROJECT]</var>/models/<var>YOUR_MODEL/versions/<var>[YOUR_VERSION]</var>"` |
| string version_name = 2; |
| |
| // Use this field if you want to specify a Google Cloud Storage path for |
| // the model to use. |
| string uri = 9; |
| } |
| |
| // Required. The format of the input data files. |
| DataFormat data_format = 3; |
| |
| // Required. The Google Cloud Storage location of the input data files. |
| // May contain wildcards. |
| repeated string input_paths = 4; |
| |
| // Required. The output Google Cloud Storage location. |
| string output_path = 5; |
| |
| // Optional. The maximum number of workers to be used for parallel processing. |
| // Defaults to 10 if not specified. |
| int64 max_worker_count = 6; |
| |
| // Required. The Google Compute Engine region to run the prediction job in. |
| string region = 7; |
| |
| // Optional. The Google Cloud ML runtime version to use for this batch |
| // prediction. If not set, Google Cloud ML will pick the runtime version used |
| // during the CreateVersion request for this model version, or choose the |
| // latest stable version when model version information is not available |
| // such as when the model is specified by uri. |
| string runtime_version = 8; |
| } |
| |
| // Represents results of a prediction job. |
| message PredictionOutput { |
| // The output Google Cloud Storage location provided at the job creation time. |
| string output_path = 1; |
| |
| // The number of generated predictions. |
| int64 prediction_count = 2; |
| |
| // The number of data instances which resulted in errors. |
| int64 error_count = 3; |
| |
| // Node hours used by the batch prediction job. |
| double node_hours = 4; |
| } |
| |
| // Represents a training or prediction job. |
| message Job { |
| // Describes the job state. |
| enum State { |
| // The job state is unspecified. |
| STATE_UNSPECIFIED = 0; |
| |
| // The job has been just created and processing has not yet begun. |
| QUEUED = 1; |
| |
| // The service is preparing to run the job. |
| PREPARING = 2; |
| |
| // The job is in progress. |
| RUNNING = 3; |
| |
| // The job completed successfully. |
| SUCCEEDED = 4; |
| |
| // The job failed. |
| // `error_message` should contain the details of the failure. |
| FAILED = 5; |
| |
| // The job is being cancelled. |
| // `error_message` should describe the reason for the cancellation. |
| CANCELLING = 6; |
| |
| // The job has been cancelled. |
| // `error_message` should describe the reason for the cancellation. |
| CANCELLED = 7; |
| } |
| |
| // Required. The user-specified id of the job. |
| string job_id = 1; |
| |
| // Required. Parameters to create a job. |
| oneof input { |
| // Input parameters to create a training job. |
| TrainingInput training_input = 2; |
| |
| // Input parameters to create a prediction job. |
| PredictionInput prediction_input = 3; |
| } |
| |
| // Output only. When the job was created. |
| google.protobuf.Timestamp create_time = 4; |
| |
| // Output only. When the job processing was started. |
| google.protobuf.Timestamp start_time = 5; |
| |
| // Output only. When the job processing was completed. |
| google.protobuf.Timestamp end_time = 6; |
| |
| // Output only. The detailed state of a job. |
| State state = 7; |
| |
| // Output only. The details of a failure or a cancellation. |
| string error_message = 8; |
| |
| // Output only. The current result of the job. |
| oneof output { |
| // The current training job result. |
| TrainingOutput training_output = 9; |
| |
| // The current prediction job result. |
| PredictionOutput prediction_output = 10; |
| } |
| } |
| |
| // Request message for the CreateJob method. |
| message CreateJobRequest { |
| // Required. The project name. |
| // |
| // Authorization: requires `Editor` role on the specified project. |
| string parent = 1; |
| |
| // Required. The job to create. |
| Job job = 2; |
| } |
| |
| // Request message for the ListJobs method. |
| message ListJobsRequest { |
| // Required. The name of the project for which to list jobs. |
| // |
| // Authorization: requires `Viewer` role on the specified project. |
| string parent = 1; |
| |
| // Optional. Specifies the subset of jobs to retrieve. |
| string filter = 2; |
| |
| // Optional. A page token to request the next page of results. |
| // |
| // You get the token from the `next_page_token` field of the response from |
| // the previous call. |
| string page_token = 4; |
| |
| // Optional. The number of jobs to retrieve per "page" of results. If there |
| // are more remaining results than this number, the response message will |
| // contain a valid value in the `next_page_token` field. |
| // |
| // The default value is 20, and the maximum page size is 100. |
| int32 page_size = 5; |
| } |
| |
| // Response message for the ListJobs method. |
| message ListJobsResponse { |
| // The list of jobs. |
| repeated Job jobs = 1; |
| |
| // Optional. Pass this token as the `page_token` field of the request for a |
| // subsequent call. |
| string next_page_token = 2; |
| } |
| |
| // Request message for the GetJob method. |
| message GetJobRequest { |
| // Required. The name of the job to get the description of. |
| // |
| // Authorization: requires `Viewer` role on the parent project. |
| string name = 1; |
| } |
| |
| // Request message for the CancelJob method. |
| message CancelJobRequest { |
| // Required. The name of the job to cancel. |
| // |
| // Authorization: requires `Editor` role on the parent project. |
| string name = 1; |
| } |