data_stats.proto 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.automl.v1beta1;
  16. import "google/api/annotations.proto";
  17. option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
  18. option java_multiple_files = true;
  19. option java_package = "com.google.cloud.automl.v1beta1";
  20. option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
  21. option ruby_package = "Google::Cloud::AutoML::V1beta1";
  22. // The data statistics of a series of values that share the same DataType.
  23. message DataStats {
  24. // The data statistics specific to a DataType.
  25. oneof stats {
  26. // The statistics for FLOAT64 DataType.
  27. Float64Stats float64_stats = 3;
  28. // The statistics for STRING DataType.
  29. StringStats string_stats = 4;
  30. // The statistics for TIMESTAMP DataType.
  31. TimestampStats timestamp_stats = 5;
  32. // The statistics for ARRAY DataType.
  33. ArrayStats array_stats = 6;
  34. // The statistics for STRUCT DataType.
  35. StructStats struct_stats = 7;
  36. // The statistics for CATEGORY DataType.
  37. CategoryStats category_stats = 8;
  38. }
  39. // The number of distinct values.
  40. int64 distinct_value_count = 1;
  41. // The number of values that are null.
  42. int64 null_value_count = 2;
  43. // The number of values that are valid.
  44. int64 valid_value_count = 9;
  45. }
  46. // The data statistics of a series of FLOAT64 values.
  47. message Float64Stats {
  48. // A bucket of a histogram.
  49. message HistogramBucket {
  50. // The minimum value of the bucket, inclusive.
  51. double min = 1;
  52. // The maximum value of the bucket, exclusive unless max = `"Infinity"`, in
  53. // which case it's inclusive.
  54. double max = 2;
  55. // The number of data values that are in the bucket, i.e. are between
  56. // min and max values.
  57. int64 count = 3;
  58. }
  59. // The mean of the series.
  60. double mean = 1;
  61. // The standard deviation of the series.
  62. double standard_deviation = 2;
  63. // Ordered from 0 to k k-quantile values of the data series of n values.
  64. // The value at index i is, approximately, the i*n/k-th smallest value in the
  65. // series; for i = 0 and i = k these are, respectively, the min and max
  66. // values.
  67. repeated double quantiles = 3;
  68. // Histogram buckets of the data series. Sorted by the min value of the
  69. // bucket, ascendingly, and the number of the buckets is dynamically
  70. // generated. The buckets are non-overlapping and completely cover whole
  71. // FLOAT64 range with min of first bucket being `"-Infinity"`, and max of
  72. // the last one being `"Infinity"`.
  73. repeated HistogramBucket histogram_buckets = 4;
  74. }
  75. // The data statistics of a series of STRING values.
  76. message StringStats {
  77. // The statistics of a unigram.
  78. message UnigramStats {
  79. // The unigram.
  80. string value = 1;
  81. // The number of occurrences of this unigram in the series.
  82. int64 count = 2;
  83. }
  84. // The statistics of the top 20 unigrams, ordered by
  85. // [count][google.cloud.automl.v1beta1.StringStats.UnigramStats.count].
  86. repeated UnigramStats top_unigram_stats = 1;
  87. }
  88. // The data statistics of a series of TIMESTAMP values.
  89. message TimestampStats {
  90. // Stats split by a defined in context granularity.
  91. message GranularStats {
  92. // A map from granularity key to example count for that key.
  93. // E.g. for hour_of_day `13` means 1pm, or for month_of_year `5` means May).
  94. map<int32, int64> buckets = 1;
  95. }
  96. // The string key is the pre-defined granularity. Currently supported:
  97. // hour_of_day, day_of_week, month_of_year.
  98. // Granularities finer that the granularity of timestamp data are not
  99. // populated (e.g. if timestamps are at day granularity, then hour_of_day
  100. // is not populated).
  101. map<string, GranularStats> granular_stats = 1;
  102. }
  103. // The data statistics of a series of ARRAY values.
  104. message ArrayStats {
  105. // Stats of all the values of all arrays, as if they were a single long
  106. // series of data. The type depends on the element type of the array.
  107. DataStats member_stats = 2;
  108. }
  109. // The data statistics of a series of STRUCT values.
  110. message StructStats {
  111. // Map from a field name of the struct to data stats aggregated over series
  112. // of all data in that field across all the structs.
  113. map<string, DataStats> field_stats = 1;
  114. }
  115. // The data statistics of a series of CATEGORY values.
  116. message CategoryStats {
  117. // The statistics of a single CATEGORY value.
  118. message SingleCategoryStats {
  119. // The CATEGORY value.
  120. string value = 1;
  121. // The number of occurrences of this value in the series.
  122. int64 count = 2;
  123. }
  124. // The statistics of the top 20 CATEGORY values, ordered by
  125. //
  126. // [count][google.cloud.automl.v1beta1.CategoryStats.SingleCategoryStats.count].
  127. repeated SingleCategoryStats top_category_stats = 1;
  128. }
  129. // A correlation statistics between two series of DataType values. The series
  130. // may have differing DataType-s, but within a single series the DataType must
  131. // be the same.
  132. message CorrelationStats {
  133. // The correlation value using the Cramer's V measure.
  134. double cramers_v = 1;
  135. }