evaluation_job.proto 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.datalabeling.v1beta1;
  17. import "google/api/annotations.proto";
  18. import "google/api/resource.proto";
  19. import "google/cloud/datalabeling/v1beta1/dataset.proto";
  20. import "google/cloud/datalabeling/v1beta1/evaluation.proto";
  21. import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
  22. import "google/protobuf/timestamp.proto";
  23. import "google/rpc/status.proto";
  24. option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
  25. option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling";
  26. option java_multiple_files = true;
  27. option java_package = "com.google.cloud.datalabeling.v1beta1";
  28. option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
  29. option ruby_package = "Google::Cloud::DataLabeling::V1beta1";
  30. // Defines an evaluation job that runs periodically to generate
  31. // [Evaluations][google.cloud.datalabeling.v1beta1.Evaluation]. [Creating an evaluation
  32. // job](/ml-engine/docs/continuous-evaluation/create-job) is the starting point
  33. // for using continuous evaluation.
  34. message EvaluationJob {
  35. option (google.api.resource) = {
  36. type: "datalabeling.googleapis.com/EvaluationJob"
  37. pattern: "projects/{project}/evaluationJobs/{evaluation_job}"
  38. };
  39. // State of the job.
  40. enum State {
  41. STATE_UNSPECIFIED = 0;
  42. // The job is scheduled to run at the [configured interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. You
  43. // can [pause][google.cloud.datalabeling.v1beta1.DataLabelingService.PauseEvaluationJob] or
  44. // [delete][google.cloud.datalabeling.v1beta1.DataLabelingService.DeleteEvaluationJob] the job.
  45. //
  46. // When the job is in this state, it samples prediction input and output
  47. // from your model version into your BigQuery table as predictions occur.
  48. SCHEDULED = 1;
  49. // The job is currently running. When the job runs, Data Labeling Service
  50. // does several things:
  51. //
  52. // 1. If you have configured your job to use Data Labeling Service for
  53. // ground truth labeling, the service creates a
  54. // [Dataset][google.cloud.datalabeling.v1beta1.Dataset] and a labeling task for all data sampled
  55. // since the last time the job ran. Human labelers provide ground truth
  56. // labels for your data. Human labeling may take hours, or even days,
  57. // depending on how much data has been sampled. The job remains in the
  58. // `RUNNING` state during this time, and it can even be running multiple
  59. // times in parallel if it gets triggered again (for example 24 hours
  60. // later) before the earlier run has completed. When human labelers have
  61. // finished labeling the data, the next step occurs.
  62. // <br><br>
  63. // If you have configured your job to provide your own ground truth
  64. // labels, Data Labeling Service still creates a [Dataset][google.cloud.datalabeling.v1beta1.Dataset] for newly
  65. // sampled data, but it expects that you have already added ground truth
  66. // labels to the BigQuery table by this time. The next step occurs
  67. // immediately.
  68. //
  69. // 2. Data Labeling Service creates an [Evaluation][google.cloud.datalabeling.v1beta1.Evaluation] by comparing your
  70. // model version's predictions with the ground truth labels.
  71. //
  72. // If the job remains in this state for a long time, it continues to sample
  73. // prediction data into your BigQuery table and will run again at the next
  74. // interval, even if it causes the job to run multiple times in parallel.
  75. RUNNING = 2;
  76. // The job is not sampling prediction input and output into your BigQuery
  77. // table and it will not run according to its schedule. You can
  78. // [resume][google.cloud.datalabeling.v1beta1.DataLabelingService.ResumeEvaluationJob] the job.
  79. PAUSED = 3;
  80. // The job has this state right before it is deleted.
  81. STOPPED = 4;
  82. }
  83. // Output only. After you create a job, Data Labeling Service assigns a name
  84. // to the job with the following format:
  85. //
  86. // "projects/<var>{project_id}</var>/evaluationJobs/<var>{evaluation_job_id}</var>"
  87. string name = 1;
  88. // Required. Description of the job. The description can be up to 25,000
  89. // characters long.
  90. string description = 2;
  91. // Output only. Describes the current state of the job.
  92. State state = 3;
  93. // Required. Describes the interval at which the job runs. This interval must
  94. // be at least 1 day, and it is rounded to the nearest day. For example, if
  95. // you specify a 50-hour interval, the job runs every 2 days.
  96. //
  97. // You can provide the schedule in
  98. // [crontab format](/scheduler/docs/configuring/cron-job-schedules) or in an
  99. // [English-like
  100. // format](/appengine/docs/standard/python/config/cronref#schedule_format).
  101. //
  102. // Regardless of what you specify, the job will run at 10:00 AM UTC. Only the
  103. // interval from this schedule is used, not the specific time of day.
  104. string schedule = 4;
  105. // Required. The [AI Platform Prediction model
  106. // version](/ml-engine/docs/prediction-overview) to be evaluated. Prediction
  107. // input and output is sampled from this model version. When creating an
  108. // evaluation job, specify the model version in the following format:
  109. //
  110. // "projects/<var>{project_id}</var>/models/<var>{model_name}</var>/versions/<var>{version_name}</var>"
  111. //
  112. // There can only be one evaluation job per model version.
  113. string model_version = 5;
  114. // Required. Configuration details for the evaluation job.
  115. EvaluationJobConfig evaluation_job_config = 6;
  116. // Required. Name of the [AnnotationSpecSet][google.cloud.datalabeling.v1beta1.AnnotationSpecSet] describing all the
  117. // labels that your machine learning model outputs. You must create this
  118. // resource before you create an evaluation job and provide its name in the
  119. // following format:
  120. //
  121. // "projects/<var>{project_id}</var>/annotationSpecSets/<var>{annotation_spec_set_id}</var>"
  122. string annotation_spec_set = 7;
  123. // Required. Whether you want Data Labeling Service to provide ground truth
  124. // labels for prediction input. If you want the service to assign human
  125. // labelers to annotate your data, set this to `true`. If you want to provide
  126. // your own ground truth labels in the evaluation job's BigQuery table, set
  127. // this to `false`.
  128. bool label_missing_ground_truth = 8;
  129. // Output only. Every time the evaluation job runs and an error occurs, the
  130. // failed attempt is appended to this array.
  131. repeated Attempt attempts = 9;
  132. // Output only. Timestamp of when this evaluation job was created.
  133. google.protobuf.Timestamp create_time = 10;
  134. }
  135. // Configures specific details of how a continuous evaluation job works. Provide
  136. // this configuration when you create an EvaluationJob.
  137. message EvaluationJobConfig {
  138. // Required. Details for how you want human reviewers to provide ground truth
  139. // labels.
  140. oneof human_annotation_request_config {
  141. // Specify this field if your model version performs image classification or
  142. // general classification.
  143. //
  144. // `annotationSpecSet` in this configuration must match
  145. // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set].
  146. // `allowMultiLabel` in this configuration must match
  147. // `classificationMetadata.isMultiLabel` in [input_config][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config].
  148. ImageClassificationConfig image_classification_config = 4;
  149. // Specify this field if your model version performs image object detection
  150. // (bounding box detection).
  151. //
  152. // `annotationSpecSet` in this configuration must match
  153. // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set].
  154. BoundingPolyConfig bounding_poly_config = 5;
  155. // Specify this field if your model version performs text classification.
  156. //
  157. // `annotationSpecSet` in this configuration must match
  158. // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set].
  159. // `allowMultiLabel` in this configuration must match
  160. // `classificationMetadata.isMultiLabel` in [input_config][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config].
  161. TextClassificationConfig text_classification_config = 8;
  162. }
  163. // Rquired. Details for the sampled prediction input. Within this
  164. // configuration, there are requirements for several fields:
  165. //
  166. // * `dataType` must be one of `IMAGE`, `TEXT`, or `GENERAL_DATA`.
  167. // * `annotationType` must be one of `IMAGE_CLASSIFICATION_ANNOTATION`,
  168. // `TEXT_CLASSIFICATION_ANNOTATION`, `GENERAL_CLASSIFICATION_ANNOTATION`,
  169. // or `IMAGE_BOUNDING_BOX_ANNOTATION` (image object detection).
  170. // * If your machine learning model performs classification, you must specify
  171. // `classificationMetadata.isMultiLabel`.
  172. // * You must specify `bigquerySource` (not `gcsSource`).
  173. InputConfig input_config = 1;
  174. // Required. Details for calculating evaluation metrics and creating
  175. // [Evaulations][google.cloud.datalabeling.v1beta1.Evaluation]. If your model version performs image object
  176. // detection, you must specify the `boundingBoxEvaluationOptions` field within
  177. // this configuration. Otherwise, provide an empty object for this
  178. // configuration.
  179. EvaluationConfig evaluation_config = 2;
  180. // Optional. Details for human annotation of your data. If you set
  181. // [labelMissingGroundTruth][google.cloud.datalabeling.v1beta1.EvaluationJob.label_missing_ground_truth] to
  182. // `true` for this evaluation job, then you must specify this field. If you
  183. // plan to provide your own ground truth labels, then omit this field.
  184. //
  185. // Note that you must create an [Instruction][google.cloud.datalabeling.v1beta1.Instruction] resource before you can
  186. // specify this field. Provide the name of the instruction resource in the
  187. // `instruction` field within this configuration.
  188. HumanAnnotationConfig human_annotation_config = 3;
  189. // Required. Prediction keys that tell Data Labeling Service where to find the
  190. // data for evaluation in your BigQuery table. When the service samples
  191. // prediction input and output from your model version and saves it to
  192. // BigQuery, the data gets stored as JSON strings in the BigQuery table. These
  193. // keys tell Data Labeling Service how to parse the JSON.
  194. //
  195. // You can provide the following entries in this field:
  196. //
  197. // * `data_json_key`: the data key for prediction input. You must provide
  198. // either this key or `reference_json_key`.
  199. // * `reference_json_key`: the data reference key for prediction input. You
  200. // must provide either this key or `data_json_key`.
  201. // * `label_json_key`: the label key for prediction output. Required.
  202. // * `label_score_json_key`: the score key for prediction output. Required.
  203. // * `bounding_box_json_key`: the bounding box key for prediction output.
  204. // Required if your model version perform image object detection.
  205. //
  206. // Learn [how to configure prediction
  207. // keys](/ml-engine/docs/continuous-evaluation/create-job#prediction-keys).
  208. map<string, string> bigquery_import_keys = 9;
  209. // Required. The maximum number of predictions to sample and save to BigQuery
  210. // during each [evaluation interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. This limit
  211. // overrides `example_sample_percentage`: even if the service has not sampled
  212. // enough predictions to fulfill `example_sample_perecentage` during an
  213. // interval, it stops sampling predictions when it meets this limit.
  214. int32 example_count = 10;
  215. // Required. Fraction of predictions to sample and save to BigQuery during
  216. // each [evaluation interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. For example, 0.1 means
  217. // 10% of predictions served by your model version get saved to BigQuery.
  218. double example_sample_percentage = 11;
  219. // Optional. Configuration details for evaluation job alerts. Specify this
  220. // field if you want to receive email alerts if the evaluation job finds that
  221. // your predictions have low mean average precision during a run.
  222. EvaluationJobAlertConfig evaluation_job_alert_config = 13;
  223. }
  224. // Provides details for how an evaluation job sends email alerts based on the
  225. // results of a run.
  226. message EvaluationJobAlertConfig {
  227. // Required. An email address to send alerts to.
  228. string email = 1;
  229. // Required. A number between 0 and 1 that describes a minimum mean average
  230. // precision threshold. When the evaluation job runs, if it calculates that
  231. // your model version's predictions from the recent interval have
  232. // [meanAveragePrecision][google.cloud.datalabeling.v1beta1.PrCurve.mean_average_precision] below this
  233. // threshold, then it sends an alert to your specified email.
  234. double min_acceptable_mean_average_precision = 2;
  235. }
  236. // Records a failed evaluation job run.
  237. message Attempt {
  238. google.protobuf.Timestamp attempt_time = 1;
  239. // Details of errors that occurred.
  240. repeated google.rpc.Status partial_failures = 2;
  241. }