data.proto 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.bigtable.v2;
  17. option csharp_namespace = "Google.Cloud.Bigtable.V2";
  18. option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable";
  19. option java_multiple_files = true;
  20. option java_outer_classname = "DataProto";
  21. option java_package = "com.google.bigtable.v2";
  22. option php_namespace = "Google\\Cloud\\Bigtable\\V2";
  23. option ruby_package = "Google::Cloud::Bigtable::V2";
  24. // Specifies the complete (requested) contents of a single row of a table.
  25. // Rows which exceed 256MiB in size cannot be read in full.
  26. message Row {
  27. // The unique key which identifies this row within its table. This is the same
  28. // key that's used to identify the row in, for example, a MutateRowRequest.
  29. // May contain any non-empty byte string up to 4KiB in length.
  30. bytes key = 1;
  31. // May be empty, but only if the entire row is empty.
  32. // The mutual ordering of column families is not specified.
  33. repeated Family families = 2;
  34. }
  35. // Specifies (some of) the contents of a single row/column family intersection
  36. // of a table.
  37. message Family {
  38. // The unique key which identifies this family within its row. This is the
  39. // same key that's used to identify the family in, for example, a RowFilter
  40. // which sets its "family_name_regex_filter" field.
  41. // Must match `[-_.a-zA-Z0-9]+`, except that AggregatingRowProcessors may
  42. // produce cells in a sentinel family with an empty name.
  43. // Must be no greater than 64 characters in length.
  44. string name = 1;
  45. // Must not be empty. Sorted in order of increasing "qualifier".
  46. repeated Column columns = 2;
  47. }
  48. // Specifies (some of) the contents of a single row/column intersection of a
  49. // table.
  50. message Column {
  51. // The unique key which identifies this column within its family. This is the
  52. // same key that's used to identify the column in, for example, a RowFilter
  53. // which sets its `column_qualifier_regex_filter` field.
  54. // May contain any byte string, including the empty string, up to 16kiB in
  55. // length.
  56. bytes qualifier = 1;
  57. // Must not be empty. Sorted in order of decreasing "timestamp_micros".
  58. repeated Cell cells = 2;
  59. }
  60. // Specifies (some of) the contents of a single row/column/timestamp of a table.
  61. message Cell {
  62. // The cell's stored timestamp, which also uniquely identifies it within
  63. // its column.
  64. // Values are always expressed in microseconds, but individual tables may set
  65. // a coarser granularity to further restrict the allowed values. For
  66. // example, a table which specifies millisecond granularity will only allow
  67. // values of `timestamp_micros` which are multiples of 1000.
  68. int64 timestamp_micros = 1;
  69. // The value stored in the cell.
  70. // May contain any byte string, including the empty string, up to 100MiB in
  71. // length.
  72. bytes value = 2;
  73. // Labels applied to the cell by a [RowFilter][google.bigtable.v2.RowFilter].
  74. repeated string labels = 3;
  75. }
  76. // Specifies a contiguous range of rows.
  77. message RowRange {
  78. // The row key at which to start the range.
  79. // If neither field is set, interpreted as the empty string, inclusive.
  80. oneof start_key {
  81. // Used when giving an inclusive lower bound for the range.
  82. bytes start_key_closed = 1;
  83. // Used when giving an exclusive lower bound for the range.
  84. bytes start_key_open = 2;
  85. }
  86. // The row key at which to end the range.
  87. // If neither field is set, interpreted as the infinite row key, exclusive.
  88. oneof end_key {
  89. // Used when giving an exclusive upper bound for the range.
  90. bytes end_key_open = 3;
  91. // Used when giving an inclusive upper bound for the range.
  92. bytes end_key_closed = 4;
  93. }
  94. }
  95. // Specifies a non-contiguous set of rows.
  96. message RowSet {
  97. // Single rows included in the set.
  98. repeated bytes row_keys = 1;
  99. // Contiguous row ranges included in the set.
  100. repeated RowRange row_ranges = 2;
  101. }
  102. // Specifies a contiguous range of columns within a single column family.
  103. // The range spans from <column_family>:<start_qualifier> to
  104. // <column_family>:<end_qualifier>, where both bounds can be either
  105. // inclusive or exclusive.
  106. message ColumnRange {
  107. // The name of the column family within which this range falls.
  108. string family_name = 1;
  109. // The column qualifier at which to start the range (within `column_family`).
  110. // If neither field is set, interpreted as the empty string, inclusive.
  111. oneof start_qualifier {
  112. // Used when giving an inclusive lower bound for the range.
  113. bytes start_qualifier_closed = 2;
  114. // Used when giving an exclusive lower bound for the range.
  115. bytes start_qualifier_open = 3;
  116. }
  117. // The column qualifier at which to end the range (within `column_family`).
  118. // If neither field is set, interpreted as the infinite string, exclusive.
  119. oneof end_qualifier {
  120. // Used when giving an inclusive upper bound for the range.
  121. bytes end_qualifier_closed = 4;
  122. // Used when giving an exclusive upper bound for the range.
  123. bytes end_qualifier_open = 5;
  124. }
  125. }
  126. // Specified a contiguous range of microsecond timestamps.
  127. message TimestampRange {
  128. // Inclusive lower bound. If left empty, interpreted as 0.
  129. int64 start_timestamp_micros = 1;
  130. // Exclusive upper bound. If left empty, interpreted as infinity.
  131. int64 end_timestamp_micros = 2;
  132. }
  133. // Specifies a contiguous range of raw byte values.
  134. message ValueRange {
  135. // The value at which to start the range.
  136. // If neither field is set, interpreted as the empty string, inclusive.
  137. oneof start_value {
  138. // Used when giving an inclusive lower bound for the range.
  139. bytes start_value_closed = 1;
  140. // Used when giving an exclusive lower bound for the range.
  141. bytes start_value_open = 2;
  142. }
  143. // The value at which to end the range.
  144. // If neither field is set, interpreted as the infinite string, exclusive.
  145. oneof end_value {
  146. // Used when giving an inclusive upper bound for the range.
  147. bytes end_value_closed = 3;
  148. // Used when giving an exclusive upper bound for the range.
  149. bytes end_value_open = 4;
  150. }
  151. }
  152. // Takes a row as input and produces an alternate view of the row based on
  153. // specified rules. For example, a RowFilter might trim down a row to include
  154. // just the cells from columns matching a given regular expression, or might
  155. // return all the cells of a row but not their values. More complicated filters
  156. // can be composed out of these components to express requests such as, "within
  157. // every column of a particular family, give just the two most recent cells
  158. // which are older than timestamp X."
  159. //
  160. // There are two broad categories of RowFilters (true filters and transformers),
  161. // as well as two ways to compose simple filters into more complex ones
  162. // (chains and interleaves). They work as follows:
  163. //
  164. // * True filters alter the input row by excluding some of its cells wholesale
  165. // from the output row. An example of a true filter is the `value_regex_filter`,
  166. // which excludes cells whose values don't match the specified pattern. All
  167. // regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax)
  168. // in raw byte mode (RE2::Latin1), and are evaluated as full matches. An
  169. // important point to keep in mind is that `RE2(.)` is equivalent by default to
  170. // `RE2([^\n])`, meaning that it does not match newlines. When attempting to
  171. // match an arbitrary byte, you should therefore use the escape sequence `\C`,
  172. // which may need to be further escaped as `\\C` in your client language.
  173. //
  174. // * Transformers alter the input row by changing the values of some of its
  175. // cells in the output, without excluding them completely. Currently, the only
  176. // supported transformer is the `strip_value_transformer`, which replaces every
  177. // cell's value with the empty string.
  178. //
  179. // * Chains and interleaves are described in more detail in the
  180. // RowFilter.Chain and RowFilter.Interleave documentation.
  181. //
  182. // The total serialized size of a RowFilter message must not
  183. // exceed 4096 bytes, and RowFilters may not be nested within each other
  184. // (in Chains or Interleaves) to a depth of more than 20.
  185. message RowFilter {
  186. // A RowFilter which sends rows through several RowFilters in sequence.
  187. message Chain {
  188. // The elements of "filters" are chained together to process the input row:
  189. // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row
  190. // The full chain is executed atomically.
  191. repeated RowFilter filters = 1;
  192. }
  193. // A RowFilter which sends each row to each of several component
  194. // RowFilters and interleaves the results.
  195. message Interleave {
  196. // The elements of "filters" all process a copy of the input row, and the
  197. // results are pooled, sorted, and combined into a single output row.
  198. // If multiple cells are produced with the same column and timestamp,
  199. // they will all appear in the output row in an unspecified mutual order.
  200. // Consider the following example, with three filters:
  201. //
  202. // input row
  203. // |
  204. // -----------------------------------------------------
  205. // | | |
  206. // f(0) f(1) f(2)
  207. // | | |
  208. // 1: foo,bar,10,x foo,bar,10,z far,bar,7,a
  209. // 2: foo,blah,11,z far,blah,5,x far,blah,5,x
  210. // | | |
  211. // -----------------------------------------------------
  212. // |
  213. // 1: foo,bar,10,z // could have switched with #2
  214. // 2: foo,bar,10,x // could have switched with #1
  215. // 3: foo,blah,11,z
  216. // 4: far,bar,7,a
  217. // 5: far,blah,5,x // identical to #6
  218. // 6: far,blah,5,x // identical to #5
  219. //
  220. // All interleaved filters are executed atomically.
  221. repeated RowFilter filters = 1;
  222. }
  223. // A RowFilter which evaluates one of two possible RowFilters, depending on
  224. // whether or not a predicate RowFilter outputs any cells from the input row.
  225. //
  226. // IMPORTANT NOTE: The predicate filter does not execute atomically with the
  227. // true and false filters, which may lead to inconsistent or unexpected
  228. // results. Additionally, Condition filters have poor performance, especially
  229. // when filters are set for the false condition.
  230. message Condition {
  231. // If `predicate_filter` outputs any cells, then `true_filter` will be
  232. // evaluated on the input row. Otherwise, `false_filter` will be evaluated.
  233. RowFilter predicate_filter = 1;
  234. // The filter to apply to the input row if `predicate_filter` returns any
  235. // results. If not provided, no results will be returned in the true case.
  236. RowFilter true_filter = 2;
  237. // The filter to apply to the input row if `predicate_filter` does not
  238. // return any results. If not provided, no results will be returned in the
  239. // false case.
  240. RowFilter false_filter = 3;
  241. }
  242. // Which of the possible RowFilter types to apply. If none are set, this
  243. // RowFilter returns all cells in the input row.
  244. oneof filter {
  245. // Applies several RowFilters to the data in sequence, progressively
  246. // narrowing the results.
  247. Chain chain = 1;
  248. // Applies several RowFilters to the data in parallel and combines the
  249. // results.
  250. Interleave interleave = 2;
  251. // Applies one of two possible RowFilters to the data based on the output of
  252. // a predicate RowFilter.
  253. Condition condition = 3;
  254. // ADVANCED USE ONLY.
  255. // Hook for introspection into the RowFilter. Outputs all cells directly to
  256. // the output of the read rather than to any parent filter. Consider the
  257. // following example:
  258. //
  259. // Chain(
  260. // FamilyRegex("A"),
  261. // Interleave(
  262. // All(),
  263. // Chain(Label("foo"), Sink())
  264. // ),
  265. // QualifierRegex("B")
  266. // )
  267. //
  268. // A,A,1,w
  269. // A,B,2,x
  270. // B,B,4,z
  271. // |
  272. // FamilyRegex("A")
  273. // |
  274. // A,A,1,w
  275. // A,B,2,x
  276. // |
  277. // +------------+-------------+
  278. // | |
  279. // All() Label(foo)
  280. // | |
  281. // A,A,1,w A,A,1,w,labels:[foo]
  282. // A,B,2,x A,B,2,x,labels:[foo]
  283. // | |
  284. // | Sink() --------------+
  285. // | | |
  286. // +------------+ x------+ A,A,1,w,labels:[foo]
  287. // | A,B,2,x,labels:[foo]
  288. // A,A,1,w |
  289. // A,B,2,x |
  290. // | |
  291. // QualifierRegex("B") |
  292. // | |
  293. // A,B,2,x |
  294. // | |
  295. // +--------------------------------+
  296. // |
  297. // A,A,1,w,labels:[foo]
  298. // A,B,2,x,labels:[foo] // could be switched
  299. // A,B,2,x // could be switched
  300. //
  301. // Despite being excluded by the qualifier filter, a copy of every cell
  302. // that reaches the sink is present in the final result.
  303. //
  304. // As with an [Interleave][google.bigtable.v2.RowFilter.Interleave],
  305. // duplicate cells are possible, and appear in an unspecified mutual order.
  306. // In this case we have a duplicate with column "A:B" and timestamp 2,
  307. // because one copy passed through the all filter while the other was
  308. // passed through the label and sink. Note that one copy has label "foo",
  309. // while the other does not.
  310. //
  311. // Cannot be used within the `predicate_filter`, `true_filter`, or
  312. // `false_filter` of a [Condition][google.bigtable.v2.RowFilter.Condition].
  313. bool sink = 16;
  314. // Matches all cells, regardless of input. Functionally equivalent to
  315. // leaving `filter` unset, but included for completeness.
  316. bool pass_all_filter = 17;
  317. // Does not match any cells, regardless of input. Useful for temporarily
  318. // disabling just part of a filter.
  319. bool block_all_filter = 18;
  320. // Matches only cells from rows whose keys satisfy the given RE2 regex. In
  321. // other words, passes through the entire row when the key matches, and
  322. // otherwise produces an empty row.
  323. // Note that, since row keys can contain arbitrary bytes, the `\C` escape
  324. // sequence must be used if a true wildcard is desired. The `.` character
  325. // will not match the new line character `\n`, which may be present in a
  326. // binary key.
  327. bytes row_key_regex_filter = 4;
  328. // Matches all cells from a row with probability p, and matches no cells
  329. // from the row with probability 1-p.
  330. double row_sample_filter = 14;
  331. // Matches only cells from columns whose families satisfy the given RE2
  332. // regex. For technical reasons, the regex must not contain the `:`
  333. // character, even if it is not being used as a literal.
  334. // Note that, since column families cannot contain the new line character
  335. // `\n`, it is sufficient to use `.` as a full wildcard when matching
  336. // column family names.
  337. string family_name_regex_filter = 5;
  338. // Matches only cells from columns whose qualifiers satisfy the given RE2
  339. // regex.
  340. // Note that, since column qualifiers can contain arbitrary bytes, the `\C`
  341. // escape sequence must be used if a true wildcard is desired. The `.`
  342. // character will not match the new line character `\n`, which may be
  343. // present in a binary qualifier.
  344. bytes column_qualifier_regex_filter = 6;
  345. // Matches only cells from columns within the given range.
  346. ColumnRange column_range_filter = 7;
  347. // Matches only cells with timestamps within the given range.
  348. TimestampRange timestamp_range_filter = 8;
  349. // Matches only cells with values that satisfy the given regular expression.
  350. // Note that, since cell values can contain arbitrary bytes, the `\C` escape
  351. // sequence must be used if a true wildcard is desired. The `.` character
  352. // will not match the new line character `\n`, which may be present in a
  353. // binary value.
  354. bytes value_regex_filter = 9;
  355. // Matches only cells with values that fall within the given range.
  356. ValueRange value_range_filter = 15;
  357. // Skips the first N cells of each row, matching all subsequent cells.
  358. // If duplicate cells are present, as is possible when using an Interleave,
  359. // each copy of the cell is counted separately.
  360. int32 cells_per_row_offset_filter = 10;
  361. // Matches only the first N cells of each row.
  362. // If duplicate cells are present, as is possible when using an Interleave,
  363. // each copy of the cell is counted separately.
  364. int32 cells_per_row_limit_filter = 11;
  365. // Matches only the most recent N cells within each column. For example,
  366. // if N=2, this filter would match column `foo:bar` at timestamps 10 and 9,
  367. // skip all earlier cells in `foo:bar`, and then begin matching again in
  368. // column `foo:bar2`.
  369. // If duplicate cells are present, as is possible when using an Interleave,
  370. // each copy of the cell is counted separately.
  371. int32 cells_per_column_limit_filter = 12;
  372. // Replaces each cell's value with the empty string.
  373. bool strip_value_transformer = 13;
  374. // Applies the given label to all cells in the output row. This allows
  375. // the client to determine which results were produced from which part of
  376. // the filter.
  377. //
  378. // Values must be at most 15 characters in length, and match the RE2
  379. // pattern `[a-z0-9\\-]+`
  380. //
  381. // Due to a technical limitation, it is not currently possible to apply
  382. // multiple labels to a cell. As a result, a Chain may have no more than
  383. // one sub-filter which contains a `apply_label_transformer`. It is okay for
  384. // an Interleave to contain multiple `apply_label_transformers`, as they
  385. // will be applied to separate copies of the input. This may be relaxed in
  386. // the future.
  387. string apply_label_transformer = 19;
  388. }
  389. }
  390. // Specifies a particular change to be made to the contents of a row.
  391. message Mutation {
  392. // A Mutation which sets the value of the specified cell.
  393. message SetCell {
  394. // The name of the family into which new data should be written.
  395. // Must match `[-_.a-zA-Z0-9]+`
  396. string family_name = 1;
  397. // The qualifier of the column into which new data should be written.
  398. // Can be any byte string, including the empty string.
  399. bytes column_qualifier = 2;
  400. // The timestamp of the cell into which new data should be written.
  401. // Use -1 for current Bigtable server time.
  402. // Otherwise, the client should set this value itself, noting that the
  403. // default value is a timestamp of zero if the field is left unspecified.
  404. // Values must match the granularity of the table (e.g. micros, millis).
  405. int64 timestamp_micros = 3;
  406. // The value to be written into the specified cell.
  407. bytes value = 4;
  408. }
  409. // A Mutation which deletes cells from the specified column, optionally
  410. // restricting the deletions to a given timestamp range.
  411. message DeleteFromColumn {
  412. // The name of the family from which cells should be deleted.
  413. // Must match `[-_.a-zA-Z0-9]+`
  414. string family_name = 1;
  415. // The qualifier of the column from which cells should be deleted.
  416. // Can be any byte string, including the empty string.
  417. bytes column_qualifier = 2;
  418. // The range of timestamps within which cells should be deleted.
  419. TimestampRange time_range = 3;
  420. }
  421. // A Mutation which deletes all cells from the specified column family.
  422. message DeleteFromFamily {
  423. // The name of the family from which cells should be deleted.
  424. // Must match `[-_.a-zA-Z0-9]+`
  425. string family_name = 1;
  426. }
  427. // A Mutation which deletes all cells from the containing row.
  428. message DeleteFromRow {
  429. }
  430. // Which of the possible Mutation types to apply.
  431. oneof mutation {
  432. // Set a cell's value.
  433. SetCell set_cell = 1;
  434. // Deletes cells from a column.
  435. DeleteFromColumn delete_from_column = 2;
  436. // Deletes cells from a column family.
  437. DeleteFromFamily delete_from_family = 3;
  438. // Deletes cells from the entire row.
  439. DeleteFromRow delete_from_row = 4;
  440. }
  441. }
  442. // Specifies an atomic read/modify/write operation on the latest value of the
  443. // specified column.
  444. message ReadModifyWriteRule {
  445. // The name of the family to which the read/modify/write should be applied.
  446. // Must match `[-_.a-zA-Z0-9]+`
  447. string family_name = 1;
  448. // The qualifier of the column to which the read/modify/write should be
  449. // applied.
  450. // Can be any byte string, including the empty string.
  451. bytes column_qualifier = 2;
  452. // The rule used to determine the column's new latest value from its current
  453. // latest value.
  454. oneof rule {
  455. // Rule specifying that `append_value` be appended to the existing value.
  456. // If the targeted cell is unset, it will be treated as containing the
  457. // empty string.
  458. bytes append_value = 3;
  459. // Rule specifying that `increment_amount` be added to the existing value.
  460. // If the targeted cell is unset, it will be treated as containing a zero.
  461. // Otherwise, the targeted cell must contain an 8-byte value (interpreted
  462. // as a 64-bit big-endian signed integer), or the entire request will fail.
  463. int64 increment_amount = 4;
  464. }
  465. }