third_party/googleapis/google/genomics/v1/annotations.proto - bazel - Git at Google

 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 syntax = "proto3";

 package google.genomics.v1;

 import "google/api/annotations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/field_mask.proto";
 import "google/protobuf/struct.proto";
 import "google/protobuf/wrappers.proto";
 import "google/rpc/status.proto";

 option cc_enable_arenas = true;
 option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
 option java_multiple_files = true;
 option java_outer_classname = "AnnotationsProto";
 option java_package = "com.google.genomics.v1";


 // This service provides storage and positional retrieval of genomic
 // reference annotations, including variant annotations.
 service AnnotationServiceV1 {
   // Creates a new annotation set. Caller must have WRITE permission for the
   // associated dataset.
   //
   // The following fields are required:
   //
   //   * [datasetId][google.genomics.v1.AnnotationSet.dataset_id]
   //   * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id]
   //
   // All other fields may be optionally specified, unless documented as being
   // server-generated (for example, the `id` field).
   rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) {
     option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" };
   }

   // Gets an annotation set. Caller must have READ permission for
   // the associated dataset.
   rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) {
     option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" };
   }

   // Updates an annotation set. The update must respect all mutability
   // restrictions and other invariants described on the annotation set resource.
   // Caller must have WRITE permission for the associated dataset.
   rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) {
     option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" };
   }

   // Deletes an annotation set. Caller must have WRITE permission
   // for the associated annotation set.
   rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) {
     option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" };
   }

   // Searches for annotation sets that match the given criteria. Annotation sets
   // are returned in an unspecified order. This order is consistent, such that
   // two queries for the same content (regardless of page size) yield annotation
   // sets in the same order across their respective streams of paginated
   // responses. Caller must have READ permission for the queried datasets.
   rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) {
     option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" };
   }

   // Creates a new annotation. Caller must have WRITE permission
   // for the associated annotation set.
   //
   // The following fields are required:
   //
   // * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id]
   // * [referenceName][google.genomics.v1.Annotation.reference_name] or
   //   [referenceId][google.genomics.v1.Annotation.reference_id]
   //
   // ### Transcripts
   //
   // For annotations of type TRANSCRIPT, the following fields of
   // [transcript][google.genomics.v1.Annotation.transcript] must be provided:
   //
   // * [exons.start][google.genomics.v1.Transcript.Exon.start]
   // * [exons.end][google.genomics.v1.Transcript.Exon.end]
   //
   // All other fields may be optionally specified, unless documented as being
   // server-generated (for example, the `id` field). The annotated
   // range must be no longer than 100Mbp (mega base pairs). See the
   // [Annotation resource][google.genomics.v1.Annotation]
   // for additional restrictions on each field.
   rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) {
     option (google.api.http) = { post: "/v1/annotations" body: "annotation" };
   }

   // Creates one or more new annotations atomically. All annotations must
   // belong to the same annotation set. Caller must have WRITE
   // permission for this annotation set. For optimal performance, batch
   // positionally adjacent annotations together.
   //
   // If the request has a systemic issue, such as an attempt to write to
   // an inaccessible annotation set, the entire RPC will fail accordingly. For
   // lesser data issues, when possible an error will be isolated to the
   // corresponding batch entry in the response; the remaining well formed
   // annotations will be created normally.
   //
   // For details on the requirements for each individual annotation resource,
   // see
   // [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation].
   rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) {
     option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" };
   }

   // Gets an annotation. Caller must have READ permission
   // for the associated annotation set.
   rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) {
     option (google.api.http) = { get: "/v1/annotations/{annotation_id}" };
   }

   // Updates an annotation. Caller must have
   // WRITE permission for the associated dataset.
   rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) {
     option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" };
   }

   // Deletes an annotation. Caller must have WRITE permission for
   // the associated annotation set.
   rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) {
     option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" };
   }

   // Searches for annotations that match the given criteria. Results are
   // ordered by genomic coordinate (by reference sequence, then position).
   // Annotations with equivalent genomic coordinates are returned in an
   // unspecified order. This order is consistent, such that two queries for the
   // same content (regardless of page size) yield annotations in the same order
   // across their respective streams of paginated responses. Caller must have
   // READ permission for the queried annotation sets.
   rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) {
     option (google.api.http) = { post: "/v1/annotations/search" body: "*" };
   }
 }

 // An annotation set is a logical grouping of annotations that share consistent
 // type information and provenance. Examples of annotation sets include 'all
 // genes from refseq', and 'all variant annotations from ClinVar'.
 message AnnotationSet {
   // The server-generated annotation set ID, unique across all annotation sets.
   string id = 1;

   // The dataset to which this annotation set belongs.
   string dataset_id = 2;

   // The ID of the reference set that defines the coordinate space for this
   // set's annotations.
   string reference_set_id = 3;

   // The display name for this annotation set.
   string name = 4;

   // The source URI describing the file from which this annotation set was
   // generated, if any.
   string source_uri = 5;

   // The type of annotations contained within this set.
   AnnotationType type = 6;

   // A map of additional read alignment information. This must be of the form
   // map<string, string[]> (string key mapping to a list of string values).
   map<string, google.protobuf.ListValue> info = 17;
 }

 // An annotation describes a region of reference genome. The value of an
 // annotation may be one of several canonical types, supplemented by arbitrary
 // info tags. An annotation is not inherently associated with a specific
 // sample or individual (though a client could choose to use annotations in
 // this way). Example canonical annotation types are `GENE` and
 // `VARIANT`.
 message Annotation {
   // The server-generated annotation ID, unique across all annotations.
   string id = 1;

   // The annotation set to which this annotation belongs.
   string annotation_set_id = 2;

   // The display name of this annotation.
   string name = 3;

   // The ID of the Google Genomics reference associated with this range.
   string reference_id = 4;

   // The display name corresponding to the reference specified by
   // `referenceId`, for example `chr1`, `1`, or `chrX`.
   string reference_name = 5;

   // The start position of the range on the reference, 0-based inclusive.
   int64 start = 6;

   // The end position of the range on the reference, 0-based exclusive.
   int64 end = 7;

   // Whether this range refers to the reverse strand, as opposed to the forward
   // strand. Note that regardless of this field, the start/end position of the
   // range always refer to the forward strand.
   bool reverse_strand = 8;

   // The data type for this annotation. Must match the containing annotation
   // set's type.
   AnnotationType type = 9;

   oneof value {
     // A variant annotation, which describes the effect of a variant on the
     // genome, the coding sequence, and/or higher level consequences at the
     // organism level e.g. pathogenicity. This field is only set for annotations
     // of type `VARIANT`.
     VariantAnnotation variant = 10;

     // A transcript value represents the assertion that a particular region of
     // the reference genome may be transcribed as RNA. An alternative splicing
     // pattern would be represented as a separate transcript object. This field
     // is only set for annotations of type `TRANSCRIPT`.
     Transcript transcript = 11;
   }

   // A map of additional read alignment information. This must be of the form
   // map<string, string[]> (string key mapping to a list of string values).
   map<string, google.protobuf.ListValue> info = 12;
 }

 message VariantAnnotation {
   message ClinicalCondition {
     // A set of names for the condition.
     repeated string names = 1;

     // The set of external IDs for this condition.
     repeated ExternalId external_ids = 2;

     // The MedGen concept id associated with this gene.
     // Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/
     string concept_id = 3;

     // The OMIM id for this condition.
     // Search for these IDs at http://omim.org/
     string omim_id = 4;
   }

   enum Type {
     TYPE_UNSPECIFIED = 0;

     // `TYPE_OTHER` should be used when no other Type will suffice.
     // Further explanation of the variant type may be included in the
     // [info][google.genomics.v1.Annotation.info] field.
     TYPE_OTHER = 1;

     // `INSERTION` indicates an insertion.
     INSERTION = 2;

     // `DELETION` indicates a deletion.
     DELETION = 3;

     // `SUBSTITUTION` indicates a block substitution of
     // two or more nucleotides.
     SUBSTITUTION = 4;

     // `SNP` indicates a single nucleotide polymorphism.
     SNP = 5;

     // `STRUCTURAL` indicates a large structural variant,
     // including chromosomal fusions, inversions, etc.
     STRUCTURAL = 6;

     // `CNV` indicates a variation in copy number.
     CNV = 7;
   }

   enum Effect {
     EFFECT_UNSPECIFIED = 0;

     // `EFFECT_OTHER` should be used when no other Effect
     // will suffice.
     EFFECT_OTHER = 1;

     // `FRAMESHIFT` indicates a mutation in which the insertion or
     // deletion of nucleotides resulted in a frameshift change.
     FRAMESHIFT = 2;

     // `FRAME_PRESERVING_INDEL` indicates a mutation in which a
     // multiple of three nucleotides has been inserted or deleted, resulting
     // in no change to the reading frame of the coding sequence.
     FRAME_PRESERVING_INDEL = 3;

     // `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism
     // mutation that results in no amino acid change.
     SYNONYMOUS_SNP = 4;

     // `NONSYNONYMOUS_SNP` indicates a single nucleotide
     // polymorphism mutation that results in an amino acid change.
     NONSYNONYMOUS_SNP = 5;

     // `STOP_GAIN` indicates a mutation that leads to the creation
     // of a stop codon at the variant site. Frameshift mutations creating
     // downstream stop codons do not count as `STOP_GAIN`.
     STOP_GAIN = 6;

     // `STOP_LOSS` indicates a mutation that eliminates a
     // stop codon at the variant site.
     STOP_LOSS = 7;

     // `SPLICE_SITE_DISRUPTION` indicates that this variant is
     // found in a splice site for the associated transcript, and alters the
     // normal splicing pattern.
     SPLICE_SITE_DISRUPTION = 8;
   }

   enum ClinicalSignificance {
     CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0;

     // `OTHER` should be used when no other clinical significance
     // value will suffice.
     CLINICAL_SIGNIFICANCE_OTHER = 1;

     UNCERTAIN = 2;

     BENIGN = 3;

     LIKELY_BENIGN = 4;

     LIKELY_PATHOGENIC = 5;

     PATHOGENIC = 6;

     DRUG_RESPONSE = 7;

     HISTOCOMPATIBILITY = 8;

     CONFERS_SENSITIVITY = 9;

     RISK_FACTOR = 10;

     ASSOCIATION = 11;

     PROTECTIVE = 12;

     // `MULTIPLE_REPORTED` should be used when multiple clinical
     // signficances are reported for a variant. The original clinical
     // significance values may be provided in the `info` field.
     MULTIPLE_REPORTED = 13;
   }

   // Type has been adapted from ClinVar's list of variant types.
   Type type = 1;

   // Effect of the variant on the coding sequence.
   Effect effect = 2;

   // The alternate allele for this variant. If multiple alternate alleles
   // exist at this location, create a separate variant for each one, as they
   // may represent distinct conditions.
   string alternate_bases = 3;

   // Google annotation ID of the gene affected by this variant. This should
   // be provided when the variant is created.
   string gene_id = 4;

   // Google annotation IDs of the transcripts affected by this variant. These
   // should be provided when the variant is created.
   repeated string transcript_ids = 5;

   // The set of conditions associated with this variant.
   // A condition describes the way a variant influences human health.
   repeated ClinicalCondition conditions = 6;

   // Describes the clinical significance of a variant.
   // It is adapted from the ClinVar controlled vocabulary for clinical
   // significance described at:
   // http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/
   ClinicalSignificance clinical_significance = 7;
 }

 // A transcript represents the assertion that a particular region of the
 // reference genome may be transcribed as RNA.
 message Transcript {
   message Exon {
     // The start position of the exon on this annotation's reference sequence,
     // 0-based inclusive. Note that this is relative to the reference start, and
     // **not** the containing annotation start.
     int64 start = 1;

     // The end position of the exon on this annotation's reference sequence,
     // 0-based exclusive. Note that this is relative to the reference start, and
     // *not* the containing annotation start.
     int64 end = 2;

     // The frame of this exon. Contains a value of 0, 1, or 2, which indicates
     // the offset of the first coding base of the exon within the reading frame
     // of the coding DNA sequence, if any. This field is dependent on the
     // strandedness of this annotation (see
     // [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]).
     // For forward stranded annotations, this offset is relative to the
     // [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse
     // strand annotations, this offset is relative to the
     // [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`.
     //
     // Unset if this exon does not intersect the coding sequence. Upon creation
     // of a transcript, the frame must be populated for all or none of the
     // coding exons.
     google.protobuf.Int32Value frame = 3;
   }

   message CodingSequence {
     // The start of the coding sequence on this annotation's reference sequence,
     // 0-based inclusive. Note that this position is relative to the reference
     // start, and *not* the containing annotation start.
     int64 start = 1;

     // The end of the coding sequence on this annotation's reference sequence,
     // 0-based exclusive. Note that this position is relative to the reference
     // start, and *not* the containing annotation start.
     int64 end = 2;
   }

   // The annotation ID of the gene from which this transcript is transcribed.
   string gene_id = 1;

   // The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose
   // this transcript. This field should be unset for genomes where transcript
   // splicing does not occur, for example prokaryotes.
   //
   // Introns are regions of the transcript that are not included in the
   // spliced RNA product. Though not explicitly modeled here, intron ranges can
   // be deduced; all regions of this transcript that are not exons are introns.
   //
   // Exonic sequences do not necessarily code for a translational product
   // (amino acids). Only the regions of exons bounded by the
   // [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond
   // to coding DNA sequence.
   //
   // Exons are ordered by start position and may not overlap.
   repeated Exon exons = 2;

   // The range of the coding sequence for this transcript, if any. To determine
   // the exact ranges of coding sequence, intersect this range with those of the
   // [exons][google.genomics.v1.Transcript.exons], if any. If there are any
   // [exons][google.genomics.v1.Transcript.exons], the
   // [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start
   // and end within them.
   //
   // Note that in some cases, the reference genome will not exactly match the
   // observed mRNA transcript e.g. due to variance in the source genome from
   // reference. In these cases,
   // [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily
   // match the expected reference reading frame and coding exon reference bases
   // cannot necessarily be concatenated to produce the original transcript mRNA.
   CodingSequence coding_sequence = 3;
 }

 message ExternalId {
   // The name of the source of this data.
   string source_name = 1;

   // The id used by the source of this data.
   string id = 2;
 }

 message CreateAnnotationSetRequest {
   // The annotation set to create.
   AnnotationSet annotation_set = 1;
 }

 message GetAnnotationSetRequest {
   // The ID of the annotation set to be retrieved.
   string annotation_set_id = 1;
 }

 message UpdateAnnotationSetRequest {
   // The ID of the annotation set to be updated.
   string annotation_set_id = 1;

   // The new annotation set.
   AnnotationSet annotation_set = 2;

   // An optional mask specifying which fields to update. Mutable fields are
   // [name][google.genomics.v1.AnnotationSet.name],
   // [source_uri][google.genomics.v1.AnnotationSet.source_uri], and
   // [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all
   // mutable fields will be updated.
   google.protobuf.FieldMask update_mask = 3;
 }

 message DeleteAnnotationSetRequest {
   // The ID of the annotation set to be deleted.
   string annotation_set_id = 1;
 }

 message SearchAnnotationSetsRequest {
   // Required. The dataset IDs to search within. Caller must have `READ` access
   // to these datasets.
   repeated string dataset_ids = 1;

   // If specified, only annotation sets associated with the given reference set
   // are returned.
   string reference_set_id = 2;

   // Only return annotations sets for which a substring of the name matches this
   // string (case insensitive).
   string name = 3;

   // If specified, only annotation sets that have any of these types are
   // returned.
   repeated AnnotationType types = 4;

   // The continuation token, which is used to page through large result sets.
   // To get the next page of results, set this parameter to the value of
   // `nextPageToken` from the previous response.
   string page_token = 5;

   // The maximum number of results to return in a single page. If unspecified,
   // defaults to 128. The maximum value is 1024.
   int32 page_size = 6;
 }

 message SearchAnnotationSetsResponse {
   // The matching annotation sets.
   repeated AnnotationSet annotation_sets = 1;

   // The continuation token, which is used to page through large result sets.
   // Provide this value in a subsequent request to return the next page of
   // results. This field will be empty if there aren't any additional results.
   string next_page_token = 2;
 }

 message CreateAnnotationRequest {
   // The annotation to be created.
   Annotation annotation = 1;
 }

 message BatchCreateAnnotationsRequest {
   // The annotations to be created. At most 4096 can be specified in a single
   // request.
   repeated Annotation annotations = 1;

   // A unique request ID which enables the server to detect duplicated requests.
   // If provided, duplicated requests will result in the same response; if not
   // provided, duplicated requests may result in duplicated data. For a given
   // annotation set, callers should not reuse `request_id`s when writing
   // different batches of annotations - behavior in this case is undefined.
   // A common approach is to use a UUID. For batch jobs where worker crashes are
   // a possibility, consider using some unique variant of a worker or run ID.
   string request_id = 2;
 }

 message BatchCreateAnnotationsResponse {
   message Entry {
     // The creation status.
     google.rpc.Status status = 1;

     // The created annotation, if creation was successful.
     Annotation annotation = 2;
   }

   // The resulting per-annotation entries, ordered consistently with the
   // original request.
   repeated Entry entries = 1;
 }

 message GetAnnotationRequest {
   // The ID of the annotation to be retrieved.
   string annotation_id = 1;
 }

 message UpdateAnnotationRequest {
   // The ID of the annotation to be updated.
   string annotation_id = 1;

   // The new annotation.
   Annotation annotation = 2;

   // An optional mask specifying which fields to update. Mutable fields are
   // [name][google.genomics.v1.Annotation.name],
   // [variant][google.genomics.v1.Annotation.variant],
   // [transcript][google.genomics.v1.Annotation.transcript], and
   // [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable
   // fields will be updated.
   google.protobuf.FieldMask update_mask = 3;
 }

 message DeleteAnnotationRequest {
   // The ID of the annotation to be deleted.
   string annotation_id = 1;
 }

 message SearchAnnotationsRequest {
   // Required. The annotation sets to search within. The caller must have
   // `READ` access to these annotation sets.
   // All queried annotation sets must have the same type.
   repeated string annotation_set_ids = 1;

   // Required. `reference_id` or `reference_name` must be set.
   oneof reference {
     // The ID of the reference to query.
     string reference_id = 2;

     // The name of the reference to query, within the reference set associated
     // with this query.
     string reference_name = 3;
   }

   // The start position of the range on the reference, 0-based inclusive. If
   // specified,
   // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
   // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
   // must be specified. Defaults to 0.
   int64 start = 4;

   // The end position of the range on the reference, 0-based exclusive. If
   // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
   // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
   // must be specified, Defaults to the length of the reference.
   int64 end = 5;

   // The continuation token, which is used to page through large result sets.
   // To get the next page of results, set this parameter to the value of
   // `nextPageToken` from the previous response.
   string page_token = 6;

   // The maximum number of results to return in a single page. If unspecified,
   // defaults to 256. The maximum value is 2048.
   int32 page_size = 7;
 }

 message SearchAnnotationsResponse {
   // The matching annotations.
   repeated Annotation annotations = 1;

   // The continuation token, which is used to page through large result sets.
   // Provide this value in a subsequent request to return the next page of
   // results. This field will be empty if there aren't any additional results.
   string next_page_token = 2;
 }

 // When an [Annotation][google.genomics.v1.Annotation] or
 // [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is
 // not specified it will be set to `GENERIC`.
 enum AnnotationType {
   ANNOTATION_TYPE_UNSPECIFIED = 0;

   // A `GENERIC` annotation type should be used when no other annotation
   // type will suffice. This represents an untyped annotation of the reference
   // genome.
   GENERIC = 1;

   // A `VARIANT` annotation type.
   VARIANT = 2;

   // A `GENE` annotation type represents the existence of a gene at the
   // associated reference coordinates. The start coordinate is typically the
   // gene's transcription start site and the end is typically the end of the
   // gene's last exon.
   GENE = 3;

   // A `TRANSCRIPT` annotation type represents the assertion that a
   // particular region of the reference genome may be transcribed as RNA.
   TRANSCRIPT = 4;
 }