blob: dd28dc7d7e6f941692b584ea368bb38ae95c8067 [file] [log] [blame]
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.privacy.dlp.v2beta1;
import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/privacy/dlp/v2beta1/storage.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2beta1;dlp";
option java_multiple_files = true;
option java_outer_classname = "DlpProto";
option java_package = "com.google.privacy.dlp.v2beta1";
// The DLP API is a service that allows clients
// to detect the presence of Personally Identifiable Information (PII) and other
// privacy-sensitive data in user-supplied, unstructured data streams, like text
// blocks or images.
// The service also includes methods for sensitive data redaction and
// scheduling of data scans on Google Cloud Platform based data sets.
service DlpService {
// Find potentially sensitive info in a list of strings.
// This method has limits on input size, processing time, and output size.
rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) {
option (google.api.http) = { post: "/v2beta1/content:inspect" body: "*" };
}
// Redact potentially sensitive info from a list of strings.
// This method has limits on input size, processing time, and output size.
rpc RedactContent(RedactContentRequest) returns (RedactContentResponse) {
option (google.api.http) = { post: "/v2beta1/content:redact" body: "*" };
}
// Schedule a job scanning content in a Google Cloud Platform data repository.
rpc CreateInspectOperation(CreateInspectOperationRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v2beta1/inspect/operations" body: "*" };
}
// Returns list of results for given inspect operation result set id.
rpc ListInspectFindings(ListInspectFindingsRequest) returns (ListInspectFindingsResponse) {
option (google.api.http) = { get: "/v2beta1/{name=inspect/results/*}/findings" };
}
// Returns sensitive information types for given category.
rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) {
option (google.api.http) = { get: "/v2beta1/rootCategories/{category=*}/infoTypes" };
}
// Returns the list of root categories of sensitive information.
rpc ListRootCategories(ListRootCategoriesRequest) returns (ListRootCategoriesResponse) {
option (google.api.http) = { get: "/v2beta1/rootCategories" };
}
}
// Configuration description of the scanning process.
// When used with redactContent only info_types and min_likelihood are currently
// used.
message InspectConfig {
// Restrict what info_types to look for. The values must correspond to
// InfoType values returned by ListInfoTypes or found in documentation.
// Empty info_types runs all enabled detectors.
repeated InfoType info_types = 1;
// Only return findings equal or above this threshold.
Likelihood min_likelihood = 2;
// Limit the number of findings per content item.
int32 max_findings = 3;
// When true, a contextual quote from the data that triggered a finding will
// be included in the response; see Finding.quote.
bool include_quote = 4;
// When true, exclude type information of the findings.
bool exclude_types = 6;
}
// Container structure for the content to inspect.
message ContentItem {
// Type of the content, as defined in Content-Type HTTP header.
// Supported types are: all "text" types, octet streams, PNG images,
// JPEG images.
string type = 1;
// Data of the item either in the byte array or UTF-8 string form.
oneof data_item {
// Content data to inspect or redact.
bytes data = 2;
// String data to inspect or redact.
string value = 3;
}
}
// All the findings for a single scanned item.
message InspectResult {
// List of findings for an item.
repeated Finding findings = 1;
// If true, then this item might have more findings than were returned,
// and the findings returned are an arbitrary subset of all findings.
// The findings list might be truncated because the input items were too
// large, or because the server reached the maximum amount of resources
// allowed for a single API call. For best results, divide the input into
// smaller batches.
bool findings_truncated = 2;
}
// Container structure describing a single finding within a string or image.
message Finding {
// The specific string that may be potentially sensitive info.
string quote = 1;
// The specific type of info the string might be.
InfoType info_type = 2;
// Estimate of how likely it is that the info_type is correct.
Likelihood likelihood = 3;
// Location of the info found.
Location location = 4;
// Timestamp when finding was detected.
google.protobuf.Timestamp create_time = 6;
}
// Specifies the location of a finding within its source item.
message Location {
// Zero-based byte offsets within a content item.
Range byte_range = 1;
// Character offsets within a content item, included when content type
// is a text. Default charset assumed to be UTF-8.
Range codepoint_range = 2;
// Location within an image's pixels.
repeated ImageLocation image_boxes = 3;
// Key of the finding.
RecordKey record_key = 4;
// Field id of the field containing the finding.
FieldId field_id = 5;
}
// Generic half-open interval [start, end)
message Range {
// Index of the first character of the range (inclusive).
int64 start = 1;
// Index of the last character of the range (exclusive).
int64 end = 2;
}
// Bounding box encompassing detected text within an image.
message ImageLocation {
// Top coordinate of the bounding box. (0,0) is upper left.
int32 top = 1;
// Left coordinate of the bounding box. (0,0) is upper left.
int32 left = 2;
// Width of the bounding box in pixels.
int32 width = 3;
// Height of the bounding box in pixels.
int32 height = 4;
}
// Request to search for potentially sensitive info in a list of items
// and replace it with a default or provided content.
message RedactContentRequest {
message ReplaceConfig {
// Type of information to replace. Only one ReplaceConfig per info_type
// should be provided. If ReplaceConfig does not have an info_type, we'll
// match it against all info_types that are found but not specified in
// another ReplaceConfig.
InfoType info_type = 1;
// Content replacing sensitive information of given type. Max 256 chars.
string replace_with = 2;
}
// Configuration for the inspector.
InspectConfig inspect_config = 1;
// The list of items to inspect. Up to 100 are allowed per request.
repeated ContentItem items = 2;
// The strings to replace findings with. Must specify at least one.
repeated ReplaceConfig replace_configs = 3;
}
// Results of deidentifying a list of items.
message RedactContentResponse {
// The redacted content.
repeated ContentItem items = 1;
}
// Request to search for potentially sensitive info in a list of items.
message InspectContentRequest {
// Configuration for the inspector.
InspectConfig inspect_config = 1;
// The list of items to inspect. Items in a single request are
// considered "related" unless inspect_config.independent_inputs is true.
// Up to 100 are allowed per request.
repeated ContentItem items = 2;
}
// Results of inspecting a list of items.
message InspectContentResponse {
// Each content_item from the request will have a result in this list, in the
// same order as the request.
repeated InspectResult results = 1;
}
// Request for scheduling a scan of a data subset from a Google Platform data
// repository.
message CreateInspectOperationRequest {
// Configuration for the inspector.
InspectConfig inspect_config = 1;
// Specification of the data set to process.
StorageConfig storage_config = 2;
// Optional location to store findings. The bucket must already exist and
// the Google APIs service account for DLP must have write permission to
// write to the given bucket.
// Results will be split over multiple csv files with each file name matching
// the pattern "[operation_id] + [count].csv".
// The operation_id will match the identifier for the Operation,
// and the [count] is a counter used for tracking the number of files written.
// The CSV file(s) contain the following columns regardless of storage type
// scanned: id, info_type, likelihood, byte size of finding, quote, time_stamp
// For cloud storage the next two columns are: file_path, start_offset
// For datastore the next two columns are: project_id, namespace_id, path,
// column_name, offset.
OutputStorageConfig output_config = 3;
}
// Cloud repository for storing output.
message OutputStorageConfig {
oneof type {
// The path to a Google Storage location to store output.
CloudStoragePath storage_path = 2;
}
}
// Stats regarding a specific InfoType.
message InfoTypeStatistics {
// The type of finding this stat is for.
InfoType info_type = 1;
// Number of findings for this info type.
int64 count = 2;
}
// Metadata returned within GetOperation for an inspect request.
message InspectOperationMetadata {
// Total size in bytes that were processed.
int64 processed_bytes = 1;
// Estimate of the number of bytes to process.
int64 total_estimated_bytes = 4;
repeated InfoTypeStatistics info_type_stats = 2;
// The time which this request was started.
google.protobuf.Timestamp create_time = 3;
// The inspect config used to create the Operation.
InspectConfig request_inspect_config = 5;
// The storage config used to create the Operation.
StorageConfig request_storage_config = 6;
// Optional location to store findings.
OutputStorageConfig request_output_config = 7;
}
// The operational data.
message InspectOperationResult {
// The server-assigned name, which is only unique within the same service that
// originally returns it. If you use the default HTTP mapping, the
// `name` should have the format of `inspect/results/{id}`.
string name = 1;
}
// Request for the list of results in a given inspect operation.
message ListInspectFindingsRequest {
// Identifier of the results set returned as metadata of
// the longrunning operation created by a call to CreateInspectOperation.
// Should be in the format of `inspect/results/{id}.
string name = 1;
// Maximum number of results to return.
// If 0, the implementation will select a reasonable value.
int32 page_size = 2;
// The value returned by the last `ListInspectFindingsResponse`; indicates
// that this is a continuation of a prior `ListInspectFindings` call, and that
// the system should return the next page of data.
string page_token = 3;
}
// Response to the ListInspectFindings request.
message ListInspectFindingsResponse {
// The results.
InspectResult result = 1;
// If not empty, indicates that there may be more results that match the
// request; this value should be passed in a new `ListInspectFindingsRequest`.
string next_page_token = 2;
}
// Info type description.
message InfoTypeDescription {
// Internal name of the info type.
string name = 1;
// Human readable form of the info type name.
string display_name = 2;
// List of categories this info type belongs to.
repeated CategoryDescription categories = 3;
}
// Request for the list of info types belonging to a given category,
// or all supported info types if no category is specified.
message ListInfoTypesRequest {
// Category name as returned by ListRootCategories.
string category = 1;
// Optional BCP-47 language code for localized info type friendly
// names. If omitted, or if localized strings are not available,
// en-US strings will be returned.
string language_code = 2;
}
// Response to the ListInfoTypes request.
message ListInfoTypesResponse {
// Set of sensitive info types belonging to a category.
repeated InfoTypeDescription info_types = 1;
}
// Info Type Category description.
message CategoryDescription {
// Internal name of the category.
string name = 1;
// Human readable form of the category name.
string display_name = 2;
}
// Request for root categories of Info Types supported by the API.
// Example values might include "FINANCE", "HEALTH", "FAST", "DEFAULT".
message ListRootCategoriesRequest {
// Optional language code for localized friendly category names.
// If omitted or if localized strings are not available,
// en-US strings will be returned.
string language_code = 1;
}
// Response for ListRootCategories request.
message ListRootCategoriesResponse {
// List of all into type categories supported by the API.
repeated CategoryDescription categories = 1;
}
// Categorization of results based on how likely they are to represent a match,
// based on the number of elements they contain which imply a match.
enum Likelihood {
// Default value; information with all likelihoods will be included.
LIKELIHOOD_UNSPECIFIED = 0;
// Few matching elements.
VERY_UNLIKELY = 1;
UNLIKELY = 2;
// Some matching elements.
POSSIBLE = 3;
LIKELY = 4;
// Many matching elements.
VERY_LIKELY = 5;
}