| // Copyright 2017 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto3"; |
| |
| package google.privacy.dlp.v2beta1; |
| |
| import "google/api/annotations.proto"; |
| import "google/longrunning/operations.proto"; |
| import "google/privacy/dlp/v2beta1/storage.proto"; |
| import "google/protobuf/timestamp.proto"; |
| |
| option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2beta1;dlp"; |
| option java_multiple_files = true; |
| option java_outer_classname = "DlpProto"; |
| option java_package = "com.google.privacy.dlp.v2beta1"; |
| |
| |
| // The DLP API is a service that allows clients |
| // to detect the presence of Personally Identifiable Information (PII) and other |
| // privacy-sensitive data in user-supplied, unstructured data streams, like text |
| // blocks or images. |
| // The service also includes methods for sensitive data redaction and |
| // scheduling of data scans on Google Cloud Platform based data sets. |
| service DlpService { |
| // Find potentially sensitive info in a list of strings. |
| // This method has limits on input size, processing time, and output size. |
| rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) { |
| option (google.api.http) = { post: "/v2beta1/content:inspect" body: "*" }; |
| } |
| |
| // Redact potentially sensitive info from a list of strings. |
| // This method has limits on input size, processing time, and output size. |
| rpc RedactContent(RedactContentRequest) returns (RedactContentResponse) { |
| option (google.api.http) = { post: "/v2beta1/content:redact" body: "*" }; |
| } |
| |
| // Schedule a job scanning content in a Google Cloud Platform data repository. |
| rpc CreateInspectOperation(CreateInspectOperationRequest) returns (google.longrunning.Operation) { |
| option (google.api.http) = { post: "/v2beta1/inspect/operations" body: "*" }; |
| } |
| |
| // Returns list of results for given inspect operation result set id. |
| rpc ListInspectFindings(ListInspectFindingsRequest) returns (ListInspectFindingsResponse) { |
| option (google.api.http) = { get: "/v2beta1/{name=inspect/results/*}/findings" }; |
| } |
| |
| // Returns sensitive information types for given category. |
| rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) { |
| option (google.api.http) = { get: "/v2beta1/rootCategories/{category=*}/infoTypes" }; |
| } |
| |
| // Returns the list of root categories of sensitive information. |
| rpc ListRootCategories(ListRootCategoriesRequest) returns (ListRootCategoriesResponse) { |
| option (google.api.http) = { get: "/v2beta1/rootCategories" }; |
| } |
| } |
| |
| // Configuration description of the scanning process. |
| // When used with redactContent only info_types and min_likelihood are currently |
| // used. |
| message InspectConfig { |
| // Restrict what info_types to look for. The values must correspond to |
| // InfoType values returned by ListInfoTypes or found in documentation. |
| // Empty info_types runs all enabled detectors. |
| repeated InfoType info_types = 1; |
| |
| // Only return findings equal or above this threshold. |
| Likelihood min_likelihood = 2; |
| |
| // Limit the number of findings per content item. |
| int32 max_findings = 3; |
| |
| // When true, a contextual quote from the data that triggered a finding will |
| // be included in the response; see Finding.quote. |
| bool include_quote = 4; |
| |
| // When true, exclude type information of the findings. |
| bool exclude_types = 6; |
| } |
| |
| // Container structure for the content to inspect. |
| message ContentItem { |
| // Type of the content, as defined in Content-Type HTTP header. |
| // Supported types are: all "text" types, octet streams, PNG images, |
| // JPEG images. |
| string type = 1; |
| |
| // Data of the item either in the byte array or UTF-8 string form. |
| oneof data_item { |
| // Content data to inspect or redact. |
| bytes data = 2; |
| |
| // String data to inspect or redact. |
| string value = 3; |
| } |
| } |
| |
| // All the findings for a single scanned item. |
| message InspectResult { |
| // List of findings for an item. |
| repeated Finding findings = 1; |
| |
| // If true, then this item might have more findings than were returned, |
| // and the findings returned are an arbitrary subset of all findings. |
| // The findings list might be truncated because the input items were too |
| // large, or because the server reached the maximum amount of resources |
| // allowed for a single API call. For best results, divide the input into |
| // smaller batches. |
| bool findings_truncated = 2; |
| } |
| |
| // Container structure describing a single finding within a string or image. |
| message Finding { |
| // The specific string that may be potentially sensitive info. |
| string quote = 1; |
| |
| // The specific type of info the string might be. |
| InfoType info_type = 2; |
| |
| // Estimate of how likely it is that the info_type is correct. |
| Likelihood likelihood = 3; |
| |
| // Location of the info found. |
| Location location = 4; |
| |
| // Timestamp when finding was detected. |
| google.protobuf.Timestamp create_time = 6; |
| } |
| |
| // Specifies the location of a finding within its source item. |
| message Location { |
| // Zero-based byte offsets within a content item. |
| Range byte_range = 1; |
| |
| // Character offsets within a content item, included when content type |
| // is a text. Default charset assumed to be UTF-8. |
| Range codepoint_range = 2; |
| |
| // Location within an image's pixels. |
| repeated ImageLocation image_boxes = 3; |
| |
| // Key of the finding. |
| RecordKey record_key = 4; |
| |
| // Field id of the field containing the finding. |
| FieldId field_id = 5; |
| } |
| |
| // Generic half-open interval [start, end) |
| message Range { |
| // Index of the first character of the range (inclusive). |
| int64 start = 1; |
| |
| // Index of the last character of the range (exclusive). |
| int64 end = 2; |
| } |
| |
| // Bounding box encompassing detected text within an image. |
| message ImageLocation { |
| // Top coordinate of the bounding box. (0,0) is upper left. |
| int32 top = 1; |
| |
| // Left coordinate of the bounding box. (0,0) is upper left. |
| int32 left = 2; |
| |
| // Width of the bounding box in pixels. |
| int32 width = 3; |
| |
| // Height of the bounding box in pixels. |
| int32 height = 4; |
| } |
| |
| // Request to search for potentially sensitive info in a list of items |
| // and replace it with a default or provided content. |
| message RedactContentRequest { |
| message ReplaceConfig { |
| // Type of information to replace. Only one ReplaceConfig per info_type |
| // should be provided. If ReplaceConfig does not have an info_type, we'll |
| // match it against all info_types that are found but not specified in |
| // another ReplaceConfig. |
| InfoType info_type = 1; |
| |
| // Content replacing sensitive information of given type. Max 256 chars. |
| string replace_with = 2; |
| } |
| |
| // Configuration for the inspector. |
| InspectConfig inspect_config = 1; |
| |
| // The list of items to inspect. Up to 100 are allowed per request. |
| repeated ContentItem items = 2; |
| |
| // The strings to replace findings with. Must specify at least one. |
| repeated ReplaceConfig replace_configs = 3; |
| } |
| |
| // Results of deidentifying a list of items. |
| message RedactContentResponse { |
| // The redacted content. |
| repeated ContentItem items = 1; |
| } |
| |
| // Request to search for potentially sensitive info in a list of items. |
| message InspectContentRequest { |
| // Configuration for the inspector. |
| InspectConfig inspect_config = 1; |
| |
| // The list of items to inspect. Items in a single request are |
| // considered "related" unless inspect_config.independent_inputs is true. |
| // Up to 100 are allowed per request. |
| repeated ContentItem items = 2; |
| } |
| |
| // Results of inspecting a list of items. |
| message InspectContentResponse { |
| // Each content_item from the request will have a result in this list, in the |
| // same order as the request. |
| repeated InspectResult results = 1; |
| } |
| |
| // Request for scheduling a scan of a data subset from a Google Platform data |
| // repository. |
| message CreateInspectOperationRequest { |
| // Configuration for the inspector. |
| InspectConfig inspect_config = 1; |
| |
| // Specification of the data set to process. |
| StorageConfig storage_config = 2; |
| |
| // Optional location to store findings. The bucket must already exist and |
| // the Google APIs service account for DLP must have write permission to |
| // write to the given bucket. |
| // Results will be split over multiple csv files with each file name matching |
| // the pattern "[operation_id] + [count].csv". |
| // The operation_id will match the identifier for the Operation, |
| // and the [count] is a counter used for tracking the number of files written. |
| // The CSV file(s) contain the following columns regardless of storage type |
| // scanned: id, info_type, likelihood, byte size of finding, quote, time_stamp |
| // For cloud storage the next two columns are: file_path, start_offset |
| // For datastore the next two columns are: project_id, namespace_id, path, |
| // column_name, offset. |
| OutputStorageConfig output_config = 3; |
| } |
| |
| // Cloud repository for storing output. |
| message OutputStorageConfig { |
| oneof type { |
| // The path to a Google Storage location to store output. |
| CloudStoragePath storage_path = 2; |
| } |
| } |
| |
| // Stats regarding a specific InfoType. |
| message InfoTypeStatistics { |
| // The type of finding this stat is for. |
| InfoType info_type = 1; |
| |
| // Number of findings for this info type. |
| int64 count = 2; |
| } |
| |
| // Metadata returned within GetOperation for an inspect request. |
| message InspectOperationMetadata { |
| // Total size in bytes that were processed. |
| int64 processed_bytes = 1; |
| |
| // Estimate of the number of bytes to process. |
| int64 total_estimated_bytes = 4; |
| |
| repeated InfoTypeStatistics info_type_stats = 2; |
| |
| // The time which this request was started. |
| google.protobuf.Timestamp create_time = 3; |
| |
| // The inspect config used to create the Operation. |
| InspectConfig request_inspect_config = 5; |
| |
| // The storage config used to create the Operation. |
| StorageConfig request_storage_config = 6; |
| |
| // Optional location to store findings. |
| OutputStorageConfig request_output_config = 7; |
| } |
| |
| // The operational data. |
| message InspectOperationResult { |
| // The server-assigned name, which is only unique within the same service that |
| // originally returns it. If you use the default HTTP mapping, the |
| // `name` should have the format of `inspect/results/{id}`. |
| string name = 1; |
| } |
| |
| // Request for the list of results in a given inspect operation. |
| message ListInspectFindingsRequest { |
| // Identifier of the results set returned as metadata of |
| // the longrunning operation created by a call to CreateInspectOperation. |
| // Should be in the format of `inspect/results/{id}. |
| string name = 1; |
| |
| // Maximum number of results to return. |
| // If 0, the implementation will select a reasonable value. |
| int32 page_size = 2; |
| |
| // The value returned by the last `ListInspectFindingsResponse`; indicates |
| // that this is a continuation of a prior `ListInspectFindings` call, and that |
| // the system should return the next page of data. |
| string page_token = 3; |
| } |
| |
| // Response to the ListInspectFindings request. |
| message ListInspectFindingsResponse { |
| // The results. |
| InspectResult result = 1; |
| |
| // If not empty, indicates that there may be more results that match the |
| // request; this value should be passed in a new `ListInspectFindingsRequest`. |
| string next_page_token = 2; |
| } |
| |
| // Info type description. |
| message InfoTypeDescription { |
| // Internal name of the info type. |
| string name = 1; |
| |
| // Human readable form of the info type name. |
| string display_name = 2; |
| |
| // List of categories this info type belongs to. |
| repeated CategoryDescription categories = 3; |
| } |
| |
| // Request for the list of info types belonging to a given category, |
| // or all supported info types if no category is specified. |
| message ListInfoTypesRequest { |
| // Category name as returned by ListRootCategories. |
| string category = 1; |
| |
| // Optional BCP-47 language code for localized info type friendly |
| // names. If omitted, or if localized strings are not available, |
| // en-US strings will be returned. |
| string language_code = 2; |
| } |
| |
| // Response to the ListInfoTypes request. |
| message ListInfoTypesResponse { |
| // Set of sensitive info types belonging to a category. |
| repeated InfoTypeDescription info_types = 1; |
| } |
| |
| // Info Type Category description. |
| message CategoryDescription { |
| // Internal name of the category. |
| string name = 1; |
| |
| // Human readable form of the category name. |
| string display_name = 2; |
| } |
| |
| // Request for root categories of Info Types supported by the API. |
| // Example values might include "FINANCE", "HEALTH", "FAST", "DEFAULT". |
| message ListRootCategoriesRequest { |
| // Optional language code for localized friendly category names. |
| // If omitted or if localized strings are not available, |
| // en-US strings will be returned. |
| string language_code = 1; |
| } |
| |
| // Response for ListRootCategories request. |
| message ListRootCategoriesResponse { |
| // List of all into type categories supported by the API. |
| repeated CategoryDescription categories = 1; |
| } |
| |
| // Categorization of results based on how likely they are to represent a match, |
| // based on the number of elements they contain which imply a match. |
| enum Likelihood { |
| // Default value; information with all likelihoods will be included. |
| LIKELIHOOD_UNSPECIFIED = 0; |
| |
| // Few matching elements. |
| VERY_UNLIKELY = 1; |
| |
| UNLIKELY = 2; |
| |
| // Some matching elements. |
| POSSIBLE = 3; |
| |
| LIKELY = 4; |
| |
| // Many matching elements. |
| VERY_LIKELY = 5; |
| } |