dataset.proto 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.datalabeling.v1beta1;
  17. import "google/api/annotations.proto";
  18. import "google/api/resource.proto";
  19. import "google/cloud/datalabeling/v1beta1/annotation.proto";
  20. import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
  21. import "google/cloud/datalabeling/v1beta1/data_payloads.proto";
  22. import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
  23. import "google/protobuf/timestamp.proto";
  24. option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
  25. option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling";
  26. option java_multiple_files = true;
  27. option java_package = "com.google.cloud.datalabeling.v1beta1";
  28. option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
  29. option ruby_package = "Google::Cloud::DataLabeling::V1beta1";
  30. enum DataType {
  31. DATA_TYPE_UNSPECIFIED = 0;
  32. // Allowed for continuous evaluation.
  33. IMAGE = 1;
  34. VIDEO = 2;
  35. // Allowed for continuous evaluation.
  36. TEXT = 4;
  37. // Allowed for continuous evaluation.
  38. GENERAL_DATA = 6;
  39. }
  40. // Dataset is the resource to hold your data. You can request multiple labeling
  41. // tasks for a dataset while each one will generate an AnnotatedDataset.
  42. message Dataset {
  43. option (google.api.resource) = {
  44. type: "datalabeling.googleapis.com/Dataset"
  45. pattern: "projects/{project}/datasets/{dataset}"
  46. };
  47. // Output only. Dataset resource name, format is:
  48. // projects/{project_id}/datasets/{dataset_id}
  49. string name = 1;
  50. // Required. The display name of the dataset. Maximum of 64 characters.
  51. string display_name = 2;
  52. // Optional. User-provided description of the annotation specification set.
  53. // The description can be up to 10000 characters long.
  54. string description = 3;
  55. // Output only. Time the dataset is created.
  56. google.protobuf.Timestamp create_time = 4;
  57. // Output only. This is populated with the original input configs
  58. // where ImportData is called. It is available only after the clients
  59. // import data to this dataset.
  60. repeated InputConfig input_configs = 5;
  61. // Output only. The names of any related resources that are blocking changes
  62. // to the dataset.
  63. repeated string blocking_resources = 6;
  64. // Output only. The number of data items in the dataset.
  65. int64 data_item_count = 7;
  66. }
  67. // The configuration of input data, including data type, location, etc.
  68. message InputConfig {
  69. // Optional. The metadata associated with each data type.
  70. oneof data_type_metadata {
  71. // Required for text import, as language code must be specified.
  72. TextMetadata text_metadata = 6;
  73. }
  74. // Required. Where the data is from.
  75. oneof source {
  76. // Source located in Cloud Storage.
  77. GcsSource gcs_source = 2;
  78. // Source located in BigQuery. You must specify this field if you are using
  79. // this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
  80. BigQuerySource bigquery_source = 5;
  81. }
  82. // Required. Data type must be specifed when user tries to import data.
  83. DataType data_type = 1;
  84. // Optional. The type of annotation to be performed on this data. You must
  85. // specify this field if you are using this InputConfig in an
  86. // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
  87. AnnotationType annotation_type = 3;
  88. // Optional. Metadata about annotations for the input. You must specify this
  89. // field if you are using this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] for a
  90. // model version that performs classification.
  91. ClassificationMetadata classification_metadata = 4;
  92. }
  93. // Metadata for the text.
  94. message TextMetadata {
  95. // The language of this text, as a
  96. // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt).
  97. // Default value is en-US.
  98. string language_code = 1;
  99. }
  100. // Metadata for classification annotations.
  101. message ClassificationMetadata {
  102. // Whether the classification task is multi-label or not.
  103. bool is_multi_label = 1;
  104. }
  105. // Source of the Cloud Storage file to be imported.
  106. message GcsSource {
  107. // Required. The input URI of source file. This must be a Cloud Storage path
  108. // (`gs://...`).
  109. string input_uri = 1;
  110. // Required. The format of the source file. Only "text/csv" is supported.
  111. string mime_type = 2;
  112. }
  113. // The BigQuery location for input data. If used in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob], this
  114. // is where the service saves the prediction input and output sampled from the
  115. // model version.
  116. message BigQuerySource {
  117. // Required. BigQuery URI to a table, up to 2,000 characters long. If you
  118. // specify the URI of a table that does not exist, Data Labeling Service
  119. // creates a table at the URI with the correct schema when you create your
  120. // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. If you specify the URI of a table that already exists,
  121. // it must have the
  122. // [correct
  123. // schema](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
  124. //
  125. // Provide the table URI in the following format:
  126. //
  127. // "bq://<var>{your_project_id}</var>/<var>{your_dataset_name}</var>/<var>{your_table_name}</var>"
  128. //
  129. // [Learn
  130. // more](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
  131. string input_uri = 1;
  132. }
  133. // The configuration of output data.
  134. message OutputConfig {
  135. // Required. Location to output data to.
  136. oneof destination {
  137. // Output to a file in Cloud Storage. Should be used for labeling output
  138. // other than image segmentation.
  139. GcsDestination gcs_destination = 1;
  140. // Output to a folder in Cloud Storage. Should be used for image
  141. // segmentation labeling output.
  142. GcsFolderDestination gcs_folder_destination = 2;
  143. }
  144. }
  145. // Export destination of the data.Only gcs path is allowed in
  146. // output_uri.
  147. message GcsDestination {
  148. // Required. The output uri of destination file.
  149. string output_uri = 1;
  150. // Required. The format of the gcs destination. Only "text/csv" and
  151. // "application/json"
  152. // are supported.
  153. string mime_type = 2;
  154. }
  155. // Export folder destination of the data.
  156. message GcsFolderDestination {
  157. // Required. Cloud Storage directory to export data to.
  158. string output_folder_uri = 1;
  159. }
  160. // DataItem is a piece of data, without annotation. For example, an image.
  161. message DataItem {
  162. option (google.api.resource) = {
  163. type: "datalabeling.googleapis.com/DataItem"
  164. pattern: "projects/{project}/datasets/{dataset}/dataItems/{data_item}"
  165. };
  166. // Output only.
  167. oneof payload {
  168. // The image payload, a container of the image bytes/uri.
  169. ImagePayload image_payload = 2;
  170. // The text payload, a container of text content.
  171. TextPayload text_payload = 3;
  172. // The video payload, a container of the video uri.
  173. VideoPayload video_payload = 4;
  174. }
  175. // Output only. Name of the data item, in format of:
  176. // projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id}
  177. string name = 1;
  178. }
  179. // AnnotatedDataset is a set holding annotations for data in a Dataset. Each
  180. // labeling task will generate an AnnotatedDataset under the Dataset that the
  181. // task is requested for.
  182. message AnnotatedDataset {
  183. option (google.api.resource) = {
  184. type: "datalabeling.googleapis.com/AnnotatedDataset"
  185. pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}"
  186. };
  187. // Output only. AnnotatedDataset resource name in format of:
  188. // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
  189. // {annotated_dataset_id}
  190. string name = 1;
  191. // Output only. The display name of the AnnotatedDataset. It is specified in
  192. // HumanAnnotationConfig when user starts a labeling task. Maximum of 64
  193. // characters.
  194. string display_name = 2;
  195. // Output only. The description of the AnnotatedDataset. It is specified in
  196. // HumanAnnotationConfig when user starts a labeling task. Maximum of 10000
  197. // characters.
  198. string description = 9;
  199. // Output only. Source of the annotation.
  200. AnnotationSource annotation_source = 3;
  201. // Output only. Type of the annotation. It is specified when starting labeling
  202. // task.
  203. AnnotationType annotation_type = 8;
  204. // Output only. Number of examples in the annotated dataset.
  205. int64 example_count = 4;
  206. // Output only. Number of examples that have annotation in the annotated
  207. // dataset.
  208. int64 completed_example_count = 5;
  209. // Output only. Per label statistics.
  210. LabelStats label_stats = 6;
  211. // Output only. Time the AnnotatedDataset was created.
  212. google.protobuf.Timestamp create_time = 7;
  213. // Output only. Additional information about AnnotatedDataset.
  214. AnnotatedDatasetMetadata metadata = 10;
  215. // Output only. The names of any related resources that are blocking changes
  216. // to the annotated dataset.
  217. repeated string blocking_resources = 11;
  218. }
  219. // Statistics about annotation specs.
  220. message LabelStats {
  221. // Map of each annotation spec's example count. Key is the annotation spec
  222. // name and value is the number of examples for that annotation spec.
  223. // If the annotated dataset does not have annotation spec, the map will return
  224. // a pair where the key is empty string and value is the total number of
  225. // annotations.
  226. map<string, int64> example_count = 1;
  227. }
  228. // Metadata on AnnotatedDataset.
  229. message AnnotatedDatasetMetadata {
  230. // Specific request configuration used when requesting the labeling task.
  231. oneof annotation_request_config {
  232. // Configuration for image classification task.
  233. ImageClassificationConfig image_classification_config = 2;
  234. // Configuration for image bounding box and bounding poly task.
  235. BoundingPolyConfig bounding_poly_config = 3;
  236. // Configuration for image polyline task.
  237. PolylineConfig polyline_config = 4;
  238. // Configuration for image segmentation task.
  239. SegmentationConfig segmentation_config = 5;
  240. // Configuration for video classification task.
  241. VideoClassificationConfig video_classification_config = 6;
  242. // Configuration for video object detection task.
  243. ObjectDetectionConfig object_detection_config = 7;
  244. // Configuration for video object tracking task.
  245. ObjectTrackingConfig object_tracking_config = 8;
  246. // Configuration for video event labeling task.
  247. EventConfig event_config = 9;
  248. // Configuration for text classification task.
  249. TextClassificationConfig text_classification_config = 10;
  250. // Configuration for text entity extraction task.
  251. TextEntityExtractionConfig text_entity_extraction_config = 11;
  252. }
  253. // HumanAnnotationConfig used when requesting the human labeling task for this
  254. // AnnotatedDataset.
  255. HumanAnnotationConfig human_annotation_config = 1;
  256. }
  257. // An Example is a piece of data and its annotation. For example, an image with
  258. // label "house".
  259. message Example {
  260. option (google.api.resource) = {
  261. type: "datalabeling.googleapis.com/Example"
  262. pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}/examples/{example}"
  263. };
  264. // Output only. The data part of Example.
  265. oneof payload {
  266. // The image payload, a container of the image bytes/uri.
  267. ImagePayload image_payload = 2;
  268. // The text payload, a container of the text content.
  269. TextPayload text_payload = 6;
  270. // The video payload, a container of the video uri.
  271. VideoPayload video_payload = 7;
  272. }
  273. // Output only. Name of the example, in format of:
  274. // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
  275. // {annotated_dataset_id}/examples/{example_id}
  276. string name = 1;
  277. // Output only. Annotations for the piece of data in Example.
  278. // One piece of data can have multiple annotations.
  279. repeated Annotation annotations = 5;
  280. }