123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496 |
- // Copyright 2016 Google Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.genomics.v1;
- import "google/api/annotations.proto";
- import "google/genomics/v1/range.proto";
- import "google/genomics/v1/readalignment.proto";
- import "google/genomics/v1/readgroupset.proto";
- import "google/longrunning/operations.proto";
- import "google/protobuf/empty.proto";
- import "google/protobuf/field_mask.proto";
- option cc_enable_arenas = true;
- option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
- option java_multiple_files = true;
- option java_outer_classname = "ReadsProto";
- option java_package = "com.google.genomics.v1";
- service StreamingReadService {
- // Returns a stream of all the reads matching the search request, ordered
- // by reference name, position, and ID.
- rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
- option (google.api.http) = {
- post: "/v1/reads:stream"
- body: "*"
- };
- }
- }
- // The Readstore. A data store for DNA sequencing Reads.
- service ReadServiceV1 {
- // Creates read group sets by asynchronously importing the provided
- // information.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- //
- // The caller must have WRITE permissions to the dataset.
- //
- // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
- //
- // - Tags will be converted to strings - tag types are not preserved
- // - Comments (`@CO`) in the input file header will not be preserved
- // - Original header order of references (`@SQ`) will not be preserved
- // - Any reverse stranded unmapped reads will be reverse complemented, and
- // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
- // - Unmapped reads will be stripped of positional information (reference name
- // and position)
- rpc ImportReadGroupSets(ImportReadGroupSetsRequest)
- returns (google.longrunning.Operation) {
- option (google.api.http) = {
- post: "/v1/readgroupsets:import"
- body: "*"
- };
- }
- // Exports a read group set to a BAM file in Google Cloud Storage.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- //
- // Note that currently there may be some differences between exported BAM
- // files and the original BAM file at the time of import. See
- // [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets]
- // for caveats.
- rpc ExportReadGroupSet(ExportReadGroupSetRequest)
- returns (google.longrunning.Operation) {
- option (google.api.http) = {
- post: "/v1/readgroupsets/{read_group_set_id}:export"
- body: "*"
- };
- }
- // Searches for read group sets matching the criteria.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- //
- // Implements
- // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
- rpc SearchReadGroupSets(SearchReadGroupSetsRequest)
- returns (SearchReadGroupSetsResponse) {
- option (google.api.http) = {
- post: "/v1/readgroupsets/search"
- body: "*"
- };
- }
- // Updates a read group set.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- //
- // This method supports patch semantics.
- rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
- option (google.api.http) = {
- patch: "/v1/readgroupsets/{read_group_set_id}"
- body: "read_group_set"
- };
- }
- // Deletes a read group set.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- rpc DeleteReadGroupSet(DeleteReadGroupSetRequest)
- returns (google.protobuf.Empty) {
- option (google.api.http) = {
- delete: "/v1/readgroupsets/{read_group_set_id}"
- };
- }
- // Gets a read group set by ID.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
- option (google.api.http) = {
- get: "/v1/readgroupsets/{read_group_set_id}"
- };
- }
- // Lists fixed width coverage buckets for a read group set, each of which
- // correspond to a range of a reference sequence. Each bucket summarizes
- // coverage information across its corresponding genomic range.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- //
- // Coverage is defined as the number of reads which are aligned to a given
- // base in the reference sequence. Coverage buckets are available at several
- // precomputed bucket widths, enabling retrieval of various coverage 'zoom
- // levels'. The caller must have READ permissions for the target read group
- // set.
- rpc ListCoverageBuckets(ListCoverageBucketsRequest)
- returns (ListCoverageBucketsResponse) {
- option (google.api.http) = {
- get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets"
- };
- }
- // Gets a list of reads for one or more read group sets.
- //
- // For the definitions of read group sets and other genomics resources, see
- // [Fundamentals of Google
- // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
- //
- // Reads search operates over a genomic coordinate space of reference sequence
- // & position defined over the reference sequences to which the requested
- // read group sets are aligned.
- //
- // If a target positional range is specified, search returns all reads whose
- // alignment to the reference genome overlap the range. A query which
- // specifies only read group set IDs yields all reads in those read group
- // sets, including unmapped reads.
- //
- // All reads returned (including reads on subsequent pages) are ordered by
- // genomic coordinate (by reference sequence, then position). Reads with
- // equivalent genomic coordinates are returned in an unspecified order. This
- // order is consistent, such that two queries for the same content (regardless
- // of page size) yield reads in the same order across their respective streams
- // of paginated responses.
- //
- // Implements
- // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
- rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
- option (google.api.http) = {
- post: "/v1/reads/search"
- body: "*"
- };
- }
- }
- // The read group set search request.
- message SearchReadGroupSetsRequest {
- // Restricts this query to read group sets within the given datasets. At least
- // one ID must be provided.
- repeated string dataset_ids = 1;
- // Only return read group sets for which a substring of the name matches this
- // string.
- string name = 3;
- // The continuation token, which is used to page through large result sets.
- // To get the next page of results, set this parameter to the value of
- // `nextPageToken` from the previous response.
- string page_token = 2;
- // The maximum number of results to return in a single page. If unspecified,
- // defaults to 256. The maximum value is 1024.
- int32 page_size = 4;
- }
- // The read group set search response.
- message SearchReadGroupSetsResponse {
- // The list of matching read group sets.
- repeated ReadGroupSet read_group_sets = 1;
- // The continuation token, which is used to page through large result sets.
- // Provide this value in a subsequent request to return the next page of
- // results. This field will be empty if there aren't any additional results.
- string next_page_token = 2;
- }
- // The read group set import request.
- message ImportReadGroupSetsRequest {
- enum PartitionStrategy {
- PARTITION_STRATEGY_UNSPECIFIED = 0;
- // In most cases, this strategy yields one read group set per file. This is
- // the default behavior.
- //
- // Allocate one read group set per file per sample. For BAM files, read
- // groups are considered to share a sample if they have identical sample
- // names. Furthermore, all reads for each file which do not belong to a read
- // group, if any, will be grouped into a single read group set per-file.
- PER_FILE_PER_SAMPLE = 1;
- // Includes all read groups in all imported files into a single read group
- // set. Requires that the headers for all imported files are equivalent. All
- // reads which do not belong to a read group, if any, will be grouped into a
- // separate read group set.
- MERGE_ALL = 2;
- }
- // Required. The ID of the dataset these read group sets will belong to. The
- // caller must have WRITE permissions to this dataset.
- string dataset_id = 1;
- // The reference set to which the imported read group sets are aligned to, if
- // any. The reference names of this reference set must be a superset of those
- // found in the imported file headers. If no reference set id is provided, a
- // best effort is made to associate with a matching reference set.
- string reference_set_id = 4;
- // A list of URIs pointing at [BAM
- // files](https://samtools.github.io/hts-specs/SAMv1.pdf)
- // in Google Cloud Storage.
- // Those URIs can include wildcards (*), but do not add or remove
- // matching files before import has completed.
- //
- // Note that Google Cloud Storage object listing is only eventually
- // consistent: files added may be not be immediately visible to
- // everyone. Thus, if using a wildcard it is preferable not to start
- // the import immediately after the files are created.
- repeated string source_uris = 2;
- // The partition strategy describes how read groups are partitioned into read
- // group sets.
- PartitionStrategy partition_strategy = 5;
- }
- // The read group set import response.
- message ImportReadGroupSetsResponse {
- // IDs of the read group sets that were created.
- repeated string read_group_set_ids = 1;
- }
- // The read group set export request.
- message ExportReadGroupSetRequest {
- // Required. The Google Cloud project ID that owns this
- // export. The caller must have WRITE access to this project.
- string project_id = 1;
- // Required. A Google Cloud Storage URI for the exported BAM file.
- // The currently authenticated user must have write access to the new file.
- // An error will be returned if the URI already contains data.
- string export_uri = 2;
- // Required. The ID of the read group set to export. The caller must have
- // READ access to this read group set.
- string read_group_set_id = 3;
- // The reference names to export. If this is not specified, all reference
- // sequences, including unmapped reads, are exported.
- // Use `*` to export only unmapped reads.
- repeated string reference_names = 4;
- }
- message UpdateReadGroupSetRequest {
- // The ID of the read group set to be updated. The caller must have WRITE
- // permissions to the dataset associated with this read group set.
- string read_group_set_id = 1;
- // The new read group set data. See `updateMask` for details on mutability of
- // fields.
- ReadGroupSet read_group_set = 2;
- // An optional mask specifying which fields to update. Supported fields:
- //
- // * [name][google.genomics.v1.ReadGroupSet.name].
- // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
- //
- // Leaving `updateMask` unset is equivalent to specifying all mutable
- // fields.
- google.protobuf.FieldMask update_mask = 3;
- }
- message DeleteReadGroupSetRequest {
- // The ID of the read group set to be deleted. The caller must have WRITE
- // permissions to the dataset associated with this read group set.
- string read_group_set_id = 1;
- }
- message GetReadGroupSetRequest {
- // The ID of the read group set.
- string read_group_set_id = 1;
- }
- message ListCoverageBucketsRequest {
- // Required. The ID of the read group set over which coverage is requested.
- string read_group_set_id = 1;
- // The name of the reference to query, within the reference set associated
- // with this query. Optional.
- string reference_name = 3;
- // The start position of the range on the reference, 0-based inclusive. If
- // specified, `referenceName` must also be specified. Defaults to 0.
- int64 start = 4;
- // The end position of the range on the reference, 0-based exclusive. If
- // specified, `referenceName` must also be specified. If unset or 0, defaults
- // to the length of the reference.
- int64 end = 5;
- // The desired width of each reported coverage bucket in base pairs. This
- // will be rounded down to the nearest precomputed bucket width; the value
- // of which is returned as `bucketWidth` in the response. Defaults
- // to infinity (each bucket spans an entire reference sequence) or the length
- // of the target range, if specified. The smallest precomputed
- // `bucketWidth` is currently 2048 base pairs; this is subject to
- // change.
- int64 target_bucket_width = 6;
- // The continuation token, which is used to page through large result sets.
- // To get the next page of results, set this parameter to the value of
- // `nextPageToken` from the previous response.
- string page_token = 7;
- // The maximum number of results to return in a single page. If unspecified,
- // defaults to 1024. The maximum value is 2048.
- int32 page_size = 8;
- }
- // A bucket over which read coverage has been precomputed. A bucket corresponds
- // to a specific range of the reference sequence.
- message CoverageBucket {
- // The genomic coordinate range spanned by this bucket.
- Range range = 1;
- // The average number of reads which are aligned to each individual
- // reference base in this bucket.
- float mean_coverage = 2;
- }
- message ListCoverageBucketsResponse {
- // The length of each coverage bucket in base pairs. Note that buckets at the
- // end of a reference sequence may be shorter. This value is omitted if the
- // bucket width is infinity (the default behaviour, with no range or
- // `targetBucketWidth`).
- int64 bucket_width = 1;
- // The coverage buckets. The list of buckets is sparse; a bucket with 0
- // overlapping reads is not returned. A bucket never crosses more than one
- // reference sequence. Each bucket has width `bucketWidth`, unless
- // its end is the end of the reference sequence.
- repeated CoverageBucket coverage_buckets = 2;
- // The continuation token, which is used to page through large result sets.
- // Provide this value in a subsequent request to return the next page of
- // results. This field will be empty if there aren't any additional results.
- string next_page_token = 3;
- }
- // The read search request.
- message SearchReadsRequest {
- // The IDs of the read groups sets within which to search for reads. All
- // specified read group sets must be aligned against a common set of reference
- // sequences; this defines the genomic coordinates for the query. Must specify
- // one of `readGroupSetIds` or `readGroupIds`.
- repeated string read_group_set_ids = 1;
- // The IDs of the read groups within which to search for reads. All specified
- // read groups must belong to the same read group sets. Must specify one of
- // `readGroupSetIds` or `readGroupIds`.
- repeated string read_group_ids = 5;
- // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
- // `*`, only unmapped reads are returned. If unspecified, all reads (mapped
- // and unmapped) are returned.
- string reference_name = 7;
- // The start position of the range on the reference, 0-based inclusive. If
- // specified, `referenceName` must also be specified.
- int64 start = 8;
- // The end position of the range on the reference, 0-based exclusive. If
- // specified, `referenceName` must also be specified.
- int64 end = 9;
- // The continuation token, which is used to page through large result sets.
- // To get the next page of results, set this parameter to the value of
- // `nextPageToken` from the previous response.
- string page_token = 3;
- // The maximum number of results to return in a single page. If unspecified,
- // defaults to 256. The maximum value is 2048.
- int32 page_size = 4;
- }
- // The read search response.
- message SearchReadsResponse {
- // The list of matching alignments sorted by mapped genomic coordinate,
- // if any, ascending in position within the same reference. Unmapped reads,
- // which have no position, are returned contiguously and are sorted in
- // ascending lexicographic order by fragment name.
- repeated Read alignments = 1;
- // The continuation token, which is used to page through large result sets.
- // Provide this value in a subsequent request to return the next page of
- // results. This field will be empty if there aren't any additional results.
- string next_page_token = 2;
- }
- // The stream reads request.
- message StreamReadsRequest {
- // The Google Cloud project ID which will be billed
- // for this access. The caller must have WRITE access to this project.
- // Required.
- string project_id = 1;
- // The ID of the read group set from which to stream reads.
- string read_group_set_id = 2;
- // The reference sequence name, for example `chr1`,
- // `1`, or `chrX`. If set to *, only unmapped reads are
- // returned.
- string reference_name = 3;
- // The start position of the range on the reference, 0-based inclusive. If
- // specified, `referenceName` must also be specified.
- int64 start = 4;
- // The end position of the range on the reference, 0-based exclusive. If
- // specified, `referenceName` must also be specified.
- int64 end = 5;
- // Restricts results to a shard containing approximately `1/totalShards`
- // of the normal response payload for this query. Results from a sharded
- // request are disjoint from those returned by all queries which differ only
- // in their shard parameter. A shard may yield 0 results; this is especially
- // likely for large values of `totalShards`.
- //
- // Valid values are `[0, totalShards)`.
- int32 shard = 6;
- // Specifying `totalShards` causes a disjoint subset of the normal response
- // payload to be returned for each query with a unique `shard` parameter
- // specified. A best effort is made to yield equally sized shards. Sharding
- // can be used to distribute processing amongst workers, where each worker is
- // assigned a unique `shard` number and all workers specify the same
- // `totalShards` number. The union of reads returned for all sharded queries
- // `[0, totalShards)` is equal to those returned by a single unsharded query.
- //
- // Queries for different values of `totalShards` with common divisors will
- // share shard boundaries. For example, streaming `shard` 2 of 5
- // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
- // `totalShards`. This property can be leveraged for adaptive retries.
- int32 total_shards = 7;
- }
- message StreamReadsResponse {
- repeated Read alignments = 1;
- }
|