third_party/googleapis/google/assistant/embedded/v1alpha1/embedded_assistant.proto - bazel - Git at Google

 // Copyright 2017 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 syntax = "proto3";

 package google.assistant.embedded.v1alpha1;

 import "google/api/annotations.proto";
 import "google/rpc/status.proto";

 option go_package = "google.golang.org/genproto/googleapis/assistant/embedded/v1alpha1;embedded";
 option java_multiple_files = true;
 option java_outer_classname = "AssistantProto";
 option java_package = "com.google.assistant.embedded.v1alpha1";


 // Service that implements Google Assistant API.
 service EmbeddedAssistant {
   // Initiates or continues a conversation with the embedded assistant service.
   // Each call performs one round-trip, sending an audio request to the service
   // and receiving the audio response. Uses bidirectional streaming to receive
   // results, such as the `END_OF_UTTERANCE` event, while sending audio.
   //
   // A conversation is one or more gRPC connections, each consisting of several
   // streamed requests and responses.
   // For example, the user says *Add to my shopping list* and the assistant
   // responds *What do you want to add?*. The sequence of streamed requests and
   // responses in the first gRPC message could be:
   //
   // *   ConverseRequest.config
   // *   ConverseRequest.audio_in
   // *   ConverseRequest.audio_in
   // *   ConverseRequest.audio_in
   // *   ConverseRequest.audio_in
   // *   ConverseResponse.event_type.END_OF_UTTERANCE
   // *   ConverseResponse.result.microphone_mode.DIALOG_FOLLOW_ON
   // *   ConverseResponse.audio_out
   // *   ConverseResponse.audio_out
   // *   ConverseResponse.audio_out
   //
   // The user then says *bagels* and the assistant responds
   // *OK, I've added bagels to your shopping list*. This is sent as another gRPC
   // connection call to the `Converse` method, again with streamed requests and
   // responses, such as:
   //
   // *   ConverseRequest.config
   // *   ConverseRequest.audio_in
   // *   ConverseRequest.audio_in
   // *   ConverseRequest.audio_in
   // *   ConverseResponse.event_type.END_OF_UTTERANCE
   // *   ConverseResponse.result.microphone_mode.CLOSE_MICROPHONE
   // *   ConverseResponse.audio_out
   // *   ConverseResponse.audio_out
   // *   ConverseResponse.audio_out
   // *   ConverseResponse.audio_out
   //
   // Although the precise order of responses is not guaranteed, sequential
   // ConverseResponse.audio_out messages will always contain sequential portions
   // of audio.
   rpc Converse(stream ConverseRequest) returns (stream ConverseResponse);
 }

 // Specifies how to process the `ConverseRequest` messages.
 message ConverseConfig {
   // *Required* Specifies how to process the subsequent incoming audio.
   AudioInConfig audio_in_config = 1;

   // *Required* Specifies how to format the audio that will be returned.
   AudioOutConfig audio_out_config = 2;

   // *Required* Represents the current dialog state.
   ConverseState converse_state = 3;
 }

 // Specifies how to process the `audio_in` data that will be provided in
 // subsequent requests. For recommended settings, see the Google Assistant SDK
 // [best practices](https://developers.google.com/assistant/best-practices).
 message AudioInConfig {
   // Audio encoding of the data sent in the audio message.
   // Audio must be one-channel (mono). The only language supported is "en-US".
   enum Encoding {
     // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
     ENCODING_UNSPECIFIED = 0;

     // Uncompressed 16-bit signed little-endian samples (Linear PCM).
     // This encoding includes no header, only the raw audio bytes.
     LINEAR16 = 1;

     // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
     // Codec) is the recommended encoding because it is
     // lossless--therefore recognition is not compromised--and
     // requires only about half the bandwidth of `LINEAR16`. This encoding
     // includes the `FLAC` stream header followed by audio data. It supports
     // 16-bit and 24-bit samples, however, not all fields in `STREAMINFO` are
     // supported.
     FLAC = 2;
   }

   // *Required* Encoding of audio data sent in all `audio_in` messages.
   Encoding encoding = 1;

   // *Required* Sample rate (in Hertz) of the audio data sent in all `audio_in`
   // messages. Valid values are from 16000-24000, but 16000 is optimal.
   // For best results, set the sampling rate of the audio source to 16000 Hz.
   // If that's not possible, use the native sample rate of the audio source
   // (instead of re-sampling).
   int32 sample_rate_hertz = 2;
 }

 // Specifies the desired format for the server to use when it returns
 // `audio_out` messages.
 message AudioOutConfig {
   // Audio encoding of the data returned in the audio message. All encodings are
   // raw audio bytes with no header, except as indicated below.
   enum Encoding {
     // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
     ENCODING_UNSPECIFIED = 0;

     // Uncompressed 16-bit signed little-endian samples (Linear PCM).
     LINEAR16 = 1;

     // MP3 audio encoding. The sample rate is encoded in the payload.
     MP3 = 2;

     // Opus-encoded audio wrapped in an ogg container. The result will be a
     // file which can be played natively on Android and in some browsers (such
     // as Chrome). The quality of the encoding is considerably higher than MP3
     // while using the same bitrate. The sample rate is encoded in the payload.
     OPUS_IN_OGG = 3;
   }

   // *Required* The encoding of audio data to be returned in all `audio_out`
   // messages.
   Encoding encoding = 1;

   // *Required* The sample rate in Hertz of the audio data returned in
   // `audio_out` messages. Valid values are: 16000-24000.
   int32 sample_rate_hertz = 2;

   // *Required* Current volume setting of the device's audio output.
   // Valid values are 1 to 100 (corresponding to 1% to 100%).
   int32 volume_percentage = 3;
 }

 // Provides information about the current dialog state.
 message ConverseState {
   // *Required* The `conversation_state` value returned in the prior
   // `ConverseResponse`. Omit (do not set the field) if there was no prior
   // `ConverseResponse`. If there was a prior `ConverseResponse`, do not omit
   // this field; doing so will end that conversation (and this new request will
   // start a new conversation).
   bytes conversation_state = 1;
 }

 // The audio containing the assistant's response to the query. Sequential chunks
 // of audio data are received in sequential `ConverseResponse` messages.
 message AudioOut {
   // *Output-only* The audio data containing the assistant's response to the
   // query. Sequential chunks of audio data are received in sequential
   // `ConverseResponse` messages.
   bytes audio_data = 1;
 }

 // The semantic result for the user's spoken query.
 message ConverseResult {
   // Possible states of the microphone after a `Converse` RPC completes.
   enum MicrophoneMode {
     // No mode specified.
     MICROPHONE_MODE_UNSPECIFIED = 0;

     // The service is not expecting a follow-on question from the user.
     // The microphone should remain off until the user re-activates it.
     CLOSE_MICROPHONE = 1;

     // The service is expecting a follow-on question from the user. The
     // microphone should be re-opened when the `AudioOut` playback completes
     // (by starting a new `Converse` RPC call to send the new audio).
     DIALOG_FOLLOW_ON = 2;
   }

   // *Output-only* The recognized transcript of what the user said.
   string spoken_request_text = 1;

   // *Output-only* The text of the assistant's spoken response. This is only
   // returned for an IFTTT action.
   string spoken_response_text = 2;

   // *Output-only* State information for subsequent `ConverseRequest`. This
   // value should be saved in the client and returned in the
   // `conversation_state` with the next `ConverseRequest`. (The client does not
   // need to interpret or otherwise use this value.) There is no need to save
   // this information across device restarts.
   bytes conversation_state = 3;

   // *Output-only* Specifies the mode of the microphone after this `Converse`
   // RPC is processed.
   MicrophoneMode microphone_mode = 4;

   // *Output-only* Updated volume level. The value will be 0 or omitted
   // (indicating no change) unless a voice command such as "Increase the volume"
   // or "Set volume level 4" was recognized, in which case the value will be
   // between 1 and 100 (corresponding to the new volume level of 1% to 100%).
   // Typically, a client should use this volume level when playing the
   // `audio_out` data, and retain this value as the current volume level and
   // supply it in the `AudioOutConfig` of the next `ConverseRequest`. (Some
   // clients may also implement other ways to allow the current volume level to
   // be changed, for example, by providing a knob that the user can turn.)
   int32 volume_percentage = 5;
 }

 // The top-level message sent by the client. Clients must send at least two, and
 // typically numerous `ConverseRequest` messages. The first message must
 // contain a `config` message and must not contain `audio_in` data. All
 // subsequent messages must contain `audio_in` data and must not contain a
 // `config` message.
 message ConverseRequest {
   // Exactly one of these fields must be specified in each `ConverseRequest`.
   oneof converse_request {
     // The `config` message provides information to the recognizer that
     // specifies how to process the request.
     // The first `ConverseRequest` message must contain a `config` message.
     ConverseConfig config = 1;

     // The audio data to be recognized. Sequential chunks of audio data are sent
     // in sequential `ConverseRequest` messages. The first `ConverseRequest`
     // message must not contain `audio_in` data and all subsequent
     // `ConverseRequest` messages must contain `audio_in` data. The audio bytes
     // must be encoded as specified in `AudioInConfig`.
     // Audio must be sent at approximately real-time (16000 samples per second).
     // An error will be returned if audio is sent significantly faster or
     // slower.
     bytes audio_in = 2;
   }
 }

 // The top-level message received by the client. A series of one or more
 // `ConverseResponse` messages are streamed back to the client.
 message ConverseResponse {
   // Indicates the type of event.
   enum EventType {
     // No event specified.
     EVENT_TYPE_UNSPECIFIED = 0;

     // This event indicates that the server has detected the end of the user's
     // speech utterance and expects no additional speech. Therefore, the server
     // will not process additional audio (although it may subsequently return
     // additional results). The client should stop sending additional audio
     // data, half-close the gRPC connection, and wait for any additional results
     // until the server closes the gRPC connection.
     END_OF_UTTERANCE = 1;
   }

   // Exactly one of these fields will be populated in each `ConverseResponse`.
   oneof converse_response {
     // *Output-only* If set, returns a [google.rpc.Status][google.rpc.Status] message that
     // specifies the error for the operation.
     // If an error occurs during processing, this message will be set and there
     // will be no further messages sent.
     google.rpc.Status error = 1;

     // *Output-only* Indicates the type of event.
     EventType event_type = 2;

     // *Output-only* The audio containing the assistant's response to the query.
     AudioOut audio_out = 3;

     // *Output-only* The semantic result for the user's spoken query.
     ConverseResult result = 5;
   }
 }
	// Copyright 2017 Google Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	syntax = "proto3";

	package google.assistant.embedded.v1alpha1;

	import "google/api/annotations.proto";
	import "google/rpc/status.proto";

	option go_package = "google.golang.org/genproto/googleapis/assistant/embedded/v1alpha1;embedded";
	option java_multiple_files = true;
	option java_outer_classname = "AssistantProto";
	option java_package = "com.google.assistant.embedded.v1alpha1";


	// Service that implements Google Assistant API.
	service EmbeddedAssistant {
	// Initiates or continues a conversation with the embedded assistant service.
	// Each call performs one round-trip, sending an audio request to the service
	// and receiving the audio response. Uses bidirectional streaming to receive
	// results, such as the `END_OF_UTTERANCE` event, while sending audio.
	//
	// A conversation is one or more gRPC connections, each consisting of several
	// streamed requests and responses.
	// For example, the user says Add to my shopping list and the assistant
	// responds What do you want to add?. The sequence of streamed requests and
	// responses in the first gRPC message could be:
	//
	// * ConverseRequest.config
	// * ConverseRequest.audio_in
	// * ConverseRequest.audio_in
	// * ConverseRequest.audio_in
	// * ConverseRequest.audio_in
	// * ConverseResponse.event_type.END_OF_UTTERANCE
	// * ConverseResponse.result.microphone_mode.DIALOG_FOLLOW_ON
	// * ConverseResponse.audio_out
	// * ConverseResponse.audio_out
	// * ConverseResponse.audio_out
	//
	// The user then says bagels and the assistant responds
	// OK, I've added bagels to your shopping list. This is sent as another gRPC
	// connection call to the `Converse` method, again with streamed requests and
	// responses, such as:
	//
	// * ConverseRequest.config
	// * ConverseRequest.audio_in
	// * ConverseRequest.audio_in
	// * ConverseRequest.audio_in
	// * ConverseResponse.event_type.END_OF_UTTERANCE
	// * ConverseResponse.result.microphone_mode.CLOSE_MICROPHONE
	// * ConverseResponse.audio_out
	// * ConverseResponse.audio_out
	// * ConverseResponse.audio_out
	// * ConverseResponse.audio_out
	//
	// Although the precise order of responses is not guaranteed, sequential
	// ConverseResponse.audio_out messages will always contain sequential portions
	// of audio.
	rpc Converse(stream ConverseRequest) returns (stream ConverseResponse);
	}

	// Specifies how to process the `ConverseRequest` messages.
	message ConverseConfig {
	// Required Specifies how to process the subsequent incoming audio.
	AudioInConfig audio_in_config = 1;

	// Required Specifies how to format the audio that will be returned.
	AudioOutConfig audio_out_config = 2;

	// Required Represents the current dialog state.
	ConverseState converse_state = 3;
	}

	// Specifies how to process the `audio_in` data that will be provided in
	// subsequent requests. For recommended settings, see the Google Assistant SDK
	// [best practices](https://developers.google.com/assistant/best-practices).
	message AudioInConfig {
	// Audio encoding of the data sent in the audio message.
	// Audio must be one-channel (mono). The only language supported is "en-US".
	enum Encoding {
	// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
	ENCODING_UNSPECIFIED = 0;

	// Uncompressed 16-bit signed little-endian samples (Linear PCM).
	// This encoding includes no header, only the raw audio bytes.
	LINEAR16 = 1;

	// [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
	// Codec) is the recommended encoding because it is
	// lossless--therefore recognition is not compromised--and
	// requires only about half the bandwidth of `LINEAR16`. This encoding
	// includes the `FLAC` stream header followed by audio data. It supports
	// 16-bit and 24-bit samples, however, not all fields in `STREAMINFO` are
	// supported.
	FLAC = 2;
	}

	// Required Encoding of audio data sent in all `audio_in` messages.
	Encoding encoding = 1;

	// Required Sample rate (in Hertz) of the audio data sent in all `audio_in`
	// messages. Valid values are from 16000-24000, but 16000 is optimal.
	// For best results, set the sampling rate of the audio source to 16000 Hz.
	// If that's not possible, use the native sample rate of the audio source
	// (instead of re-sampling).
	int32 sample_rate_hertz = 2;
	}

	// Specifies the desired format for the server to use when it returns
	// `audio_out` messages.
	message AudioOutConfig {
	// Audio encoding of the data returned in the audio message. All encodings are
	// raw audio bytes with no header, except as indicated below.
	enum Encoding {
	// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
	ENCODING_UNSPECIFIED = 0;

	// Uncompressed 16-bit signed little-endian samples (Linear PCM).
	LINEAR16 = 1;

	// MP3 audio encoding. The sample rate is encoded in the payload.
	MP3 = 2;

	// Opus-encoded audio wrapped in an ogg container. The result will be a
	// file which can be played natively on Android and in some browsers (such
	// as Chrome). The quality of the encoding is considerably higher than MP3
	// while using the same bitrate. The sample rate is encoded in the payload.
	OPUS_IN_OGG = 3;
	}

	// Required The encoding of audio data to be returned in all `audio_out`
	// messages.
	Encoding encoding = 1;

	// Required The sample rate in Hertz of the audio data returned in
	// `audio_out` messages. Valid values are: 16000-24000.
	int32 sample_rate_hertz = 2;

	// Required Current volume setting of the device's audio output.
	// Valid values are 1 to 100 (corresponding to 1% to 100%).
	int32 volume_percentage = 3;
	}

	// Provides information about the current dialog state.
	message ConverseState {
	// Required The `conversation_state` value returned in the prior
	// `ConverseResponse`. Omit (do not set the field) if there was no prior
	// `ConverseResponse`. If there was a prior `ConverseResponse`, do not omit
	// this field; doing so will end that conversation (and this new request will
	// start a new conversation).
	bytes conversation_state = 1;
	}

	// The audio containing the assistant's response to the query. Sequential chunks
	// of audio data are received in sequential `ConverseResponse` messages.
	message AudioOut {
	// Output-only The audio data containing the assistant's response to the
	// query. Sequential chunks of audio data are received in sequential
	// `ConverseResponse` messages.
	bytes audio_data = 1;
	}

	// The semantic result for the user's spoken query.
	message ConverseResult {
	// Possible states of the microphone after a `Converse` RPC completes.
	enum MicrophoneMode {
	// No mode specified.
	MICROPHONE_MODE_UNSPECIFIED = 0;

	// The service is not expecting a follow-on question from the user.
	// The microphone should remain off until the user re-activates it.
	CLOSE_MICROPHONE = 1;

	// The service is expecting a follow-on question from the user. The
	// microphone should be re-opened when the `AudioOut` playback completes
	// (by starting a new `Converse` RPC call to send the new audio).
	DIALOG_FOLLOW_ON = 2;
	}

	// Output-only The recognized transcript of what the user said.
	string spoken_request_text = 1;

	// Output-only The text of the assistant's spoken response. This is only
	// returned for an IFTTT action.
	string spoken_response_text = 2;

	// Output-only State information for subsequent `ConverseRequest`. This
	// value should be saved in the client and returned in the
	// `conversation_state` with the next `ConverseRequest`. (The client does not
	// need to interpret or otherwise use this value.) There is no need to save
	// this information across device restarts.
	bytes conversation_state = 3;

	// Output-only Specifies the mode of the microphone after this `Converse`
	// RPC is processed.
	MicrophoneMode microphone_mode = 4;

	// Output-only Updated volume level. The value will be 0 or omitted
	// (indicating no change) unless a voice command such as "Increase the volume"
	// or "Set volume level 4" was recognized, in which case the value will be
	// between 1 and 100 (corresponding to the new volume level of 1% to 100%).
	// Typically, a client should use this volume level when playing the
	// `audio_out` data, and retain this value as the current volume level and
	// supply it in the `AudioOutConfig` of the next `ConverseRequest`. (Some
	// clients may also implement other ways to allow the current volume level to
	// be changed, for example, by providing a knob that the user can turn.)
	int32 volume_percentage = 5;
	}

	// The top-level message sent by the client. Clients must send at least two, and
	// typically numerous `ConverseRequest` messages. The first message must
	// contain a `config` message and must not contain `audio_in` data. All
	// subsequent messages must contain `audio_in` data and must not contain a
	// `config` message.
	message ConverseRequest {
	// Exactly one of these fields must be specified in each `ConverseRequest`.
	oneof converse_request {
	// The `config` message provides information to the recognizer that
	// specifies how to process the request.
	// The first `ConverseRequest` message must contain a `config` message.
	ConverseConfig config = 1;

	// The audio data to be recognized. Sequential chunks of audio data are sent
	// in sequential `ConverseRequest` messages. The first `ConverseRequest`
	// message must not contain `audio_in` data and all subsequent
	// `ConverseRequest` messages must contain `audio_in` data. The audio bytes
	// must be encoded as specified in `AudioInConfig`.
	// Audio must be sent at approximately real-time (16000 samples per second).
	// An error will be returned if audio is sent significantly faster or
	// slower.
	bytes audio_in = 2;
	}
	}

	// The top-level message received by the client. A series of one or more
	// `ConverseResponse` messages are streamed back to the client.
	message ConverseResponse {
	// Indicates the type of event.
	enum EventType {
	// No event specified.
	EVENT_TYPE_UNSPECIFIED = 0;

	// This event indicates that the server has detected the end of the user's
	// speech utterance and expects no additional speech. Therefore, the server
	// will not process additional audio (although it may subsequently return
	// additional results). The client should stop sending additional audio
	// data, half-close the gRPC connection, and wait for any additional results
	// until the server closes the gRPC connection.
	END_OF_UTTERANCE = 1;
	}

	// Exactly one of these fields will be populated in each `ConverseResponse`.
	oneof converse_response {
	// Output-only If set, returns a [google.rpc.Status][google.rpc.Status] message that
	// specifies the error for the operation.
	// If an error occurs during processing, this message will be set and there
	// will be no further messages sent.
	google.rpc.Status error = 1;

	// Output-only Indicates the type of event.
	EventType event_type = 2;

	// Output-only The audio containing the assistant's response to the query.
	AudioOut audio_out = 3;

	// Output-only The semantic result for the user's spoken query.
	ConverseResult result = 5;
	}
	}