| // Copyright 2016 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto3"; |
| |
| package google.genomics.v1; |
| |
| import "google/api/annotations.proto"; |
| import "google/genomics/v1/range.proto"; |
| import "google/genomics/v1/readalignment.proto"; |
| import "google/genomics/v1/readgroupset.proto"; |
| import "google/longrunning/operations.proto"; |
| import "google/protobuf/empty.proto"; |
| import "google/protobuf/field_mask.proto"; |
| |
| option cc_enable_arenas = true; |
| option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; |
| option java_multiple_files = true; |
| option java_outer_classname = "ReadsProto"; |
| option java_package = "com.google.genomics.v1"; |
| |
| |
| service StreamingReadService { |
| // Returns a stream of all the reads matching the search request, ordered |
| // by reference name, position, and ID. |
| rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) { |
| option (google.api.http) = { post: "/v1/reads:stream" body: "*" }; |
| } |
| } |
| |
| // The Readstore. A data store for DNA sequencing Reads. |
| service ReadServiceV1 { |
| // Creates read group sets by asynchronously importing the provided |
| // information. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| // |
| // The caller must have WRITE permissions to the dataset. |
| // |
| // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import |
| // |
| // - Tags will be converted to strings - tag types are not preserved |
| // - Comments (`@CO`) in the input file header will not be preserved |
| // - Original header order of references (`@SQ`) will not be preserved |
| // - Any reverse stranded unmapped reads will be reverse complemented, and |
| // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed |
| // - Unmapped reads will be stripped of positional information (reference name |
| // and position) |
| rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) { |
| option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" }; |
| } |
| |
| // Exports a read group set to a BAM file in Google Cloud Storage. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| // |
| // Note that currently there may be some differences between exported BAM |
| // files and the original BAM file at the time of import. See |
| // [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets] |
| // for caveats. |
| rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) { |
| option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" }; |
| } |
| |
| // Searches for read group sets matching the criteria. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| // |
| // Implements |
| // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135). |
| rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) { |
| option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" }; |
| } |
| |
| // Updates a read group set. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| // |
| // This method supports patch semantics. |
| rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) { |
| option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" }; |
| } |
| |
| // Deletes a read group set. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) { |
| option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" }; |
| } |
| |
| // Gets a read group set by ID. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) { |
| option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" }; |
| } |
| |
| // Lists fixed width coverage buckets for a read group set, each of which |
| // correspond to a range of a reference sequence. Each bucket summarizes |
| // coverage information across its corresponding genomic range. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| // |
| // Coverage is defined as the number of reads which are aligned to a given |
| // base in the reference sequence. Coverage buckets are available at several |
| // precomputed bucket widths, enabling retrieval of various coverage 'zoom |
| // levels'. The caller must have READ permissions for the target read group |
| // set. |
| rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) { |
| option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" }; |
| } |
| |
| // Gets a list of reads for one or more read group sets. |
| // |
| // For the definitions of read group sets and other genomics resources, see |
| // [Fundamentals of Google |
| // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
| // |
| // Reads search operates over a genomic coordinate space of reference sequence |
| // & position defined over the reference sequences to which the requested |
| // read group sets are aligned. |
| // |
| // If a target positional range is specified, search returns all reads whose |
| // alignment to the reference genome overlap the range. A query which |
| // specifies only read group set IDs yields all reads in those read group |
| // sets, including unmapped reads. |
| // |
| // All reads returned (including reads on subsequent pages) are ordered by |
| // genomic coordinate (by reference sequence, then position). Reads with |
| // equivalent genomic coordinates are returned in an unspecified order. This |
| // order is consistent, such that two queries for the same content (regardless |
| // of page size) yield reads in the same order across their respective streams |
| // of paginated responses. |
| // |
| // Implements |
| // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85). |
| rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) { |
| option (google.api.http) = { post: "/v1/reads/search" body: "*" }; |
| } |
| } |
| |
| // The read group set search request. |
| message SearchReadGroupSetsRequest { |
| // Restricts this query to read group sets within the given datasets. At least |
| // one ID must be provided. |
| repeated string dataset_ids = 1; |
| |
| // Only return read group sets for which a substring of the name matches this |
| // string. |
| string name = 3; |
| |
| // The continuation token, which is used to page through large result sets. |
| // To get the next page of results, set this parameter to the value of |
| // `nextPageToken` from the previous response. |
| string page_token = 2; |
| |
| // The maximum number of results to return in a single page. If unspecified, |
| // defaults to 256. The maximum value is 1024. |
| int32 page_size = 4; |
| } |
| |
| // The read group set search response. |
| message SearchReadGroupSetsResponse { |
| // The list of matching read group sets. |
| repeated ReadGroupSet read_group_sets = 1; |
| |
| // The continuation token, which is used to page through large result sets. |
| // Provide this value in a subsequent request to return the next page of |
| // results. This field will be empty if there aren't any additional results. |
| string next_page_token = 2; |
| } |
| |
| // The read group set import request. |
| message ImportReadGroupSetsRequest { |
| enum PartitionStrategy { |
| PARTITION_STRATEGY_UNSPECIFIED = 0; |
| |
| // In most cases, this strategy yields one read group set per file. This is |
| // the default behavior. |
| // |
| // Allocate one read group set per file per sample. For BAM files, read |
| // groups are considered to share a sample if they have identical sample |
| // names. Furthermore, all reads for each file which do not belong to a read |
| // group, if any, will be grouped into a single read group set per-file. |
| PER_FILE_PER_SAMPLE = 1; |
| |
| // Includes all read groups in all imported files into a single read group |
| // set. Requires that the headers for all imported files are equivalent. All |
| // reads which do not belong to a read group, if any, will be grouped into a |
| // separate read group set. |
| MERGE_ALL = 2; |
| } |
| |
| // Required. The ID of the dataset these read group sets will belong to. The |
| // caller must have WRITE permissions to this dataset. |
| string dataset_id = 1; |
| |
| // The reference set to which the imported read group sets are aligned to, if |
| // any. The reference names of this reference set must be a superset of those |
| // found in the imported file headers. If no reference set id is provided, a |
| // best effort is made to associate with a matching reference set. |
| string reference_set_id = 4; |
| |
| // A list of URIs pointing at [BAM |
| // files](https://samtools.github.io/hts-specs/SAMv1.pdf) |
| // in Google Cloud Storage. |
| // Those URIs can include wildcards (*), but do not add or remove |
| // matching files before import has completed. |
| // |
| // Note that Google Cloud Storage object listing is only eventually |
| // consistent: files added may be not be immediately visible to |
| // everyone. Thus, if using a wildcard it is preferable not to start |
| // the import immediately after the files are created. |
| repeated string source_uris = 2; |
| |
| // The partition strategy describes how read groups are partitioned into read |
| // group sets. |
| PartitionStrategy partition_strategy = 5; |
| } |
| |
| // The read group set import response. |
| message ImportReadGroupSetsResponse { |
| // IDs of the read group sets that were created. |
| repeated string read_group_set_ids = 1; |
| } |
| |
| // The read group set export request. |
| message ExportReadGroupSetRequest { |
| // Required. The Google Cloud project ID that owns this |
| // export. The caller must have WRITE access to this project. |
| string project_id = 1; |
| |
| // Required. A Google Cloud Storage URI for the exported BAM file. |
| // The currently authenticated user must have write access to the new file. |
| // An error will be returned if the URI already contains data. |
| string export_uri = 2; |
| |
| // Required. The ID of the read group set to export. The caller must have |
| // READ access to this read group set. |
| string read_group_set_id = 3; |
| |
| // The reference names to export. If this is not specified, all reference |
| // sequences, including unmapped reads, are exported. |
| // Use `*` to export only unmapped reads. |
| repeated string reference_names = 4; |
| } |
| |
| message UpdateReadGroupSetRequest { |
| // The ID of the read group set to be updated. The caller must have WRITE |
| // permissions to the dataset associated with this read group set. |
| string read_group_set_id = 1; |
| |
| // The new read group set data. See `updateMask` for details on mutability of |
| // fields. |
| ReadGroupSet read_group_set = 2; |
| |
| // An optional mask specifying which fields to update. Supported fields: |
| // |
| // * [name][google.genomics.v1.ReadGroupSet.name]. |
| // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id]. |
| // |
| // Leaving `updateMask` unset is equivalent to specifying all mutable |
| // fields. |
| google.protobuf.FieldMask update_mask = 3; |
| } |
| |
| message DeleteReadGroupSetRequest { |
| // The ID of the read group set to be deleted. The caller must have WRITE |
| // permissions to the dataset associated with this read group set. |
| string read_group_set_id = 1; |
| } |
| |
| message GetReadGroupSetRequest { |
| // The ID of the read group set. |
| string read_group_set_id = 1; |
| } |
| |
| message ListCoverageBucketsRequest { |
| // Required. The ID of the read group set over which coverage is requested. |
| string read_group_set_id = 1; |
| |
| // The name of the reference to query, within the reference set associated |
| // with this query. Optional. |
| string reference_name = 3; |
| |
| // The start position of the range on the reference, 0-based inclusive. If |
| // specified, `referenceName` must also be specified. Defaults to 0. |
| int64 start = 4; |
| |
| // The end position of the range on the reference, 0-based exclusive. If |
| // specified, `referenceName` must also be specified. If unset or 0, defaults |
| // to the length of the reference. |
| int64 end = 5; |
| |
| // The desired width of each reported coverage bucket in base pairs. This |
| // will be rounded down to the nearest precomputed bucket width; the value |
| // of which is returned as `bucketWidth` in the response. Defaults |
| // to infinity (each bucket spans an entire reference sequence) or the length |
| // of the target range, if specified. The smallest precomputed |
| // `bucketWidth` is currently 2048 base pairs; this is subject to |
| // change. |
| int64 target_bucket_width = 6; |
| |
| // The continuation token, which is used to page through large result sets. |
| // To get the next page of results, set this parameter to the value of |
| // `nextPageToken` from the previous response. |
| string page_token = 7; |
| |
| // The maximum number of results to return in a single page. If unspecified, |
| // defaults to 1024. The maximum value is 2048. |
| int32 page_size = 8; |
| } |
| |
| // A bucket over which read coverage has been precomputed. A bucket corresponds |
| // to a specific range of the reference sequence. |
| message CoverageBucket { |
| // The genomic coordinate range spanned by this bucket. |
| Range range = 1; |
| |
| // The average number of reads which are aligned to each individual |
| // reference base in this bucket. |
| float mean_coverage = 2; |
| } |
| |
| message ListCoverageBucketsResponse { |
| // The length of each coverage bucket in base pairs. Note that buckets at the |
| // end of a reference sequence may be shorter. This value is omitted if the |
| // bucket width is infinity (the default behaviour, with no range or |
| // `targetBucketWidth`). |
| int64 bucket_width = 1; |
| |
| // The coverage buckets. The list of buckets is sparse; a bucket with 0 |
| // overlapping reads is not returned. A bucket never crosses more than one |
| // reference sequence. Each bucket has width `bucketWidth`, unless |
| // its end is the end of the reference sequence. |
| repeated CoverageBucket coverage_buckets = 2; |
| |
| // The continuation token, which is used to page through large result sets. |
| // Provide this value in a subsequent request to return the next page of |
| // results. This field will be empty if there aren't any additional results. |
| string next_page_token = 3; |
| } |
| |
| // The read search request. |
| message SearchReadsRequest { |
| // The IDs of the read groups sets within which to search for reads. All |
| // specified read group sets must be aligned against a common set of reference |
| // sequences; this defines the genomic coordinates for the query. Must specify |
| // one of `readGroupSetIds` or `readGroupIds`. |
| repeated string read_group_set_ids = 1; |
| |
| // The IDs of the read groups within which to search for reads. All specified |
| // read groups must belong to the same read group sets. Must specify one of |
| // `readGroupSetIds` or `readGroupIds`. |
| repeated string read_group_ids = 5; |
| |
| // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to |
| // `*`, only unmapped reads are returned. If unspecified, all reads (mapped |
| // and unmapped) are returned. |
| string reference_name = 7; |
| |
| // The start position of the range on the reference, 0-based inclusive. If |
| // specified, `referenceName` must also be specified. |
| int64 start = 8; |
| |
| // The end position of the range on the reference, 0-based exclusive. If |
| // specified, `referenceName` must also be specified. |
| int64 end = 9; |
| |
| // The continuation token, which is used to page through large result sets. |
| // To get the next page of results, set this parameter to the value of |
| // `nextPageToken` from the previous response. |
| string page_token = 3; |
| |
| // The maximum number of results to return in a single page. If unspecified, |
| // defaults to 256. The maximum value is 2048. |
| int32 page_size = 4; |
| } |
| |
| // The read search response. |
| message SearchReadsResponse { |
| // The list of matching alignments sorted by mapped genomic coordinate, |
| // if any, ascending in position within the same reference. Unmapped reads, |
| // which have no position, are returned contiguously and are sorted in |
| // ascending lexicographic order by fragment name. |
| repeated Read alignments = 1; |
| |
| // The continuation token, which is used to page through large result sets. |
| // Provide this value in a subsequent request to return the next page of |
| // results. This field will be empty if there aren't any additional results. |
| string next_page_token = 2; |
| } |
| |
| // The stream reads request. |
| message StreamReadsRequest { |
| // The Google Cloud project ID which will be billed |
| // for this access. The caller must have WRITE access to this project. |
| // Required. |
| string project_id = 1; |
| |
| // The ID of the read group set from which to stream reads. |
| string read_group_set_id = 2; |
| |
| // The reference sequence name, for example `chr1`, |
| // `1`, or `chrX`. If set to *, only unmapped reads are |
| // returned. |
| string reference_name = 3; |
| |
| // The start position of the range on the reference, 0-based inclusive. If |
| // specified, `referenceName` must also be specified. |
| int64 start = 4; |
| |
| // The end position of the range on the reference, 0-based exclusive. If |
| // specified, `referenceName` must also be specified. |
| int64 end = 5; |
| |
| // Restricts results to a shard containing approximately `1/totalShards` |
| // of the normal response payload for this query. Results from a sharded |
| // request are disjoint from those returned by all queries which differ only |
| // in their shard parameter. A shard may yield 0 results; this is especially |
| // likely for large values of `totalShards`. |
| // |
| // Valid values are `[0, totalShards)`. |
| int32 shard = 6; |
| |
| // Specifying `totalShards` causes a disjoint subset of the normal response |
| // payload to be returned for each query with a unique `shard` parameter |
| // specified. A best effort is made to yield equally sized shards. Sharding |
| // can be used to distribute processing amongst workers, where each worker is |
| // assigned a unique `shard` number and all workers specify the same |
| // `totalShards` number. The union of reads returned for all sharded queries |
| // `[0, totalShards)` is equal to those returned by a single unsharded query. |
| // |
| // Queries for different values of `totalShards` with common divisors will |
| // share shard boundaries. For example, streaming `shard` 2 of 5 |
| // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10 |
| // `totalShards`. This property can be leveraged for adaptive retries. |
| int32 total_shards = 7; |
| } |
| |
| message StreamReadsResponse { |
| repeated Read alignments = 1; |
| } |