image_annotator.proto 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.vision.v1;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/cloud/vision/v1/geometry.proto";
  20. import "google/cloud/vision/v1/product_search.proto";
  21. import "google/cloud/vision/v1/text_annotation.proto";
  22. import "google/cloud/vision/v1/web_detection.proto";
  23. import "google/longrunning/operations.proto";
  24. import "google/protobuf/field_mask.proto";
  25. import "google/protobuf/timestamp.proto";
  26. import "google/rpc/status.proto";
  27. import "google/type/color.proto";
  28. import "google/type/latlng.proto";
  29. option cc_enable_arenas = true;
  30. option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
  31. option java_multiple_files = true;
  32. option java_outer_classname = "ImageAnnotatorProto";
  33. option java_package = "com.google.cloud.vision.v1";
  34. option objc_class_prefix = "GCVN";
  35. // Service that performs Google Cloud Vision API detection tasks over client
  36. // images, such as face, landmark, logo, label, and text detection. The
  37. // ImageAnnotator service returns detected entities from the images.
  38. service ImageAnnotator {
  39. option (google.api.default_host) = "vision.googleapis.com";
  40. option (google.api.oauth_scopes) =
  41. "https://www.googleapis.com/auth/cloud-platform,"
  42. "https://www.googleapis.com/auth/cloud-vision";
  43. // Run image detection and annotation for a batch of images.
  44. rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
  45. returns (BatchAnnotateImagesResponse) {
  46. option (google.api.http) = {
  47. post: "/v1/images:annotate"
  48. body: "*"
  49. additional_bindings {
  50. post: "/v1/{parent=projects/*/locations/*}/images:annotate"
  51. body: "*"
  52. }
  53. additional_bindings {
  54. post: "/v1/{parent=projects/*}/images:annotate"
  55. body: "*"
  56. }
  57. };
  58. option (google.api.method_signature) = "requests";
  59. }
  60. // Service that performs image detection and annotation for a batch of files.
  61. // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
  62. //
  63. // This service will extract at most 5 (customers can specify which 5 in
  64. // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
  65. // file provided and perform detection and annotation for each image
  66. // extracted.
  67. rpc BatchAnnotateFiles(BatchAnnotateFilesRequest)
  68. returns (BatchAnnotateFilesResponse) {
  69. option (google.api.http) = {
  70. post: "/v1/files:annotate"
  71. body: "*"
  72. additional_bindings {
  73. post: "/v1/{parent=projects/*/locations/*}/files:annotate"
  74. body: "*"
  75. }
  76. additional_bindings {
  77. post: "/v1/{parent=projects/*}/files:annotate"
  78. body: "*"
  79. }
  80. };
  81. option (google.api.method_signature) = "requests";
  82. }
  83. // Run asynchronous image detection and annotation for a list of images.
  84. //
  85. // Progress and results can be retrieved through the
  86. // `google.longrunning.Operations` interface.
  87. // `Operation.metadata` contains `OperationMetadata` (metadata).
  88. // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
  89. //
  90. // This service will write image annotation outputs to json files in customer
  91. // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
  92. rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest)
  93. returns (google.longrunning.Operation) {
  94. option (google.api.http) = {
  95. post: "/v1/images:asyncBatchAnnotate"
  96. body: "*"
  97. additional_bindings {
  98. post: "/v1/{parent=projects/*/locations/*}/images:asyncBatchAnnotate"
  99. body: "*"
  100. }
  101. additional_bindings {
  102. post: "/v1/{parent=projects/*}/images:asyncBatchAnnotate"
  103. body: "*"
  104. }
  105. };
  106. option (google.api.method_signature) = "requests,output_config";
  107. option (google.longrunning.operation_info) = {
  108. response_type: "AsyncBatchAnnotateImagesResponse"
  109. metadata_type: "OperationMetadata"
  110. };
  111. }
  112. // Run asynchronous image detection and annotation for a list of generic
  113. // files, such as PDF files, which may contain multiple pages and multiple
  114. // images per page. Progress and results can be retrieved through the
  115. // `google.longrunning.Operations` interface.
  116. // `Operation.metadata` contains `OperationMetadata` (metadata).
  117. // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  118. rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
  119. returns (google.longrunning.Operation) {
  120. option (google.api.http) = {
  121. post: "/v1/files:asyncBatchAnnotate"
  122. body: "*"
  123. additional_bindings {
  124. post: "/v1/{parent=projects/*/locations/*}/files:asyncBatchAnnotate"
  125. body: "*"
  126. }
  127. additional_bindings {
  128. post: "/v1/{parent=projects/*}/files:asyncBatchAnnotate"
  129. body: "*"
  130. }
  131. };
  132. option (google.api.method_signature) = "requests";
  133. option (google.longrunning.operation_info) = {
  134. response_type: "AsyncBatchAnnotateFilesResponse"
  135. metadata_type: "OperationMetadata"
  136. };
  137. }
  138. }
  139. // The type of Google Cloud Vision API detection to perform, and the maximum
  140. // number of results to return for that type. Multiple `Feature` objects can
  141. // be specified in the `features` list.
  142. message Feature {
  143. // Type of Google Cloud Vision API feature to be extracted.
  144. enum Type {
  145. // Unspecified feature type.
  146. TYPE_UNSPECIFIED = 0;
  147. // Run face detection.
  148. FACE_DETECTION = 1;
  149. // Run landmark detection.
  150. LANDMARK_DETECTION = 2;
  151. // Run logo detection.
  152. LOGO_DETECTION = 3;
  153. // Run label detection.
  154. LABEL_DETECTION = 4;
  155. // Run text detection / optical character recognition (OCR). Text detection
  156. // is optimized for areas of text within a larger image; if the image is
  157. // a document, use `DOCUMENT_TEXT_DETECTION` instead.
  158. TEXT_DETECTION = 5;
  159. // Run dense text document OCR. Takes precedence when both
  160. // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
  161. DOCUMENT_TEXT_DETECTION = 11;
  162. // Run Safe Search to detect potentially unsafe
  163. // or undesirable content.
  164. SAFE_SEARCH_DETECTION = 6;
  165. // Compute a set of image properties, such as the
  166. // image's dominant colors.
  167. IMAGE_PROPERTIES = 7;
  168. // Run crop hints.
  169. CROP_HINTS = 9;
  170. // Run web detection.
  171. WEB_DETECTION = 10;
  172. // Run Product Search.
  173. PRODUCT_SEARCH = 12;
  174. // Run localizer for object detection.
  175. OBJECT_LOCALIZATION = 19;
  176. }
  177. // The feature type.
  178. Type type = 1;
  179. // Maximum number of results of this type. Does not apply to
  180. // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
  181. int32 max_results = 2;
  182. // Model to use for the feature.
  183. // Supported values: "builtin/stable" (the default if unset) and
  184. // "builtin/latest".
  185. string model = 3;
  186. }
  187. // External image source (Google Cloud Storage or web URL image location).
  188. message ImageSource {
  189. // **Use `image_uri` instead.**
  190. //
  191. // The Google Cloud Storage URI of the form
  192. // `gs://bucket_name/object_name`. Object versioning is not supported. See
  193. // [Google Cloud Storage Request
  194. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
  195. string gcs_image_uri = 1;
  196. // The URI of the source image. Can be either:
  197. //
  198. // 1. A Google Cloud Storage URI of the form
  199. // `gs://bucket_name/object_name`. Object versioning is not supported. See
  200. // [Google Cloud Storage Request
  201. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  202. // info.
  203. //
  204. // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
  205. // HTTP/HTTPS URLs, Google cannot guarantee that the request will be
  206. // completed. Your request may fail if the specified host denies the
  207. // request (e.g. due to request throttling or DOS prevention), or if Google
  208. // throttles requests to the site for abuse prevention. You should not
  209. // depend on externally-hosted images for production applications.
  210. //
  211. // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
  212. // precedence.
  213. string image_uri = 2;
  214. }
  215. // Client image to perform Google Cloud Vision API tasks over.
  216. message Image {
  217. // Image content, represented as a stream of bytes.
  218. // Note: As with all `bytes` fields, protobuffers use a pure binary
  219. // representation, whereas JSON representations use base64.
  220. //
  221. // Currently, this field only works for BatchAnnotateImages requests. It does
  222. // not work for AsyncBatchAnnotateImages requests.
  223. bytes content = 1;
  224. // Google Cloud Storage image location, or publicly-accessible image
  225. // URL. If both `content` and `source` are provided for an image, `content`
  226. // takes precedence and is used to perform the image annotation request.
  227. ImageSource source = 2;
  228. }
  229. // A bucketized representation of likelihood, which is intended to give clients
  230. // highly stable results across model upgrades.
  231. enum Likelihood {
  232. // Unknown likelihood.
  233. UNKNOWN = 0;
  234. // It is very unlikely.
  235. VERY_UNLIKELY = 1;
  236. // It is unlikely.
  237. UNLIKELY = 2;
  238. // It is possible.
  239. POSSIBLE = 3;
  240. // It is likely.
  241. LIKELY = 4;
  242. // It is very likely.
  243. VERY_LIKELY = 5;
  244. }
  245. // A face annotation object contains the results of face detection.
  246. message FaceAnnotation {
  247. // A face-specific landmark (for example, a face feature).
  248. message Landmark {
  249. // Face landmark (feature) type.
  250. // Left and right are defined from the vantage of the viewer of the image
  251. // without considering mirror projections typical of photos. So, `LEFT_EYE`,
  252. // typically, is the person's right eye.
  253. enum Type {
  254. // Unknown face landmark detected. Should not be filled.
  255. UNKNOWN_LANDMARK = 0;
  256. // Left eye.
  257. LEFT_EYE = 1;
  258. // Right eye.
  259. RIGHT_EYE = 2;
  260. // Left of left eyebrow.
  261. LEFT_OF_LEFT_EYEBROW = 3;
  262. // Right of left eyebrow.
  263. RIGHT_OF_LEFT_EYEBROW = 4;
  264. // Left of right eyebrow.
  265. LEFT_OF_RIGHT_EYEBROW = 5;
  266. // Right of right eyebrow.
  267. RIGHT_OF_RIGHT_EYEBROW = 6;
  268. // Midpoint between eyes.
  269. MIDPOINT_BETWEEN_EYES = 7;
  270. // Nose tip.
  271. NOSE_TIP = 8;
  272. // Upper lip.
  273. UPPER_LIP = 9;
  274. // Lower lip.
  275. LOWER_LIP = 10;
  276. // Mouth left.
  277. MOUTH_LEFT = 11;
  278. // Mouth right.
  279. MOUTH_RIGHT = 12;
  280. // Mouth center.
  281. MOUTH_CENTER = 13;
  282. // Nose, bottom right.
  283. NOSE_BOTTOM_RIGHT = 14;
  284. // Nose, bottom left.
  285. NOSE_BOTTOM_LEFT = 15;
  286. // Nose, bottom center.
  287. NOSE_BOTTOM_CENTER = 16;
  288. // Left eye, top boundary.
  289. LEFT_EYE_TOP_BOUNDARY = 17;
  290. // Left eye, right corner.
  291. LEFT_EYE_RIGHT_CORNER = 18;
  292. // Left eye, bottom boundary.
  293. LEFT_EYE_BOTTOM_BOUNDARY = 19;
  294. // Left eye, left corner.
  295. LEFT_EYE_LEFT_CORNER = 20;
  296. // Right eye, top boundary.
  297. RIGHT_EYE_TOP_BOUNDARY = 21;
  298. // Right eye, right corner.
  299. RIGHT_EYE_RIGHT_CORNER = 22;
  300. // Right eye, bottom boundary.
  301. RIGHT_EYE_BOTTOM_BOUNDARY = 23;
  302. // Right eye, left corner.
  303. RIGHT_EYE_LEFT_CORNER = 24;
  304. // Left eyebrow, upper midpoint.
  305. LEFT_EYEBROW_UPPER_MIDPOINT = 25;
  306. // Right eyebrow, upper midpoint.
  307. RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
  308. // Left ear tragion.
  309. LEFT_EAR_TRAGION = 27;
  310. // Right ear tragion.
  311. RIGHT_EAR_TRAGION = 28;
  312. // Left eye pupil.
  313. LEFT_EYE_PUPIL = 29;
  314. // Right eye pupil.
  315. RIGHT_EYE_PUPIL = 30;
  316. // Forehead glabella.
  317. FOREHEAD_GLABELLA = 31;
  318. // Chin gnathion.
  319. CHIN_GNATHION = 32;
  320. // Chin left gonion.
  321. CHIN_LEFT_GONION = 33;
  322. // Chin right gonion.
  323. CHIN_RIGHT_GONION = 34;
  324. // Left cheek center.
  325. LEFT_CHEEK_CENTER = 35;
  326. // Right cheek center.
  327. RIGHT_CHEEK_CENTER = 36;
  328. }
  329. // Face landmark type.
  330. Type type = 3;
  331. // Face landmark position.
  332. Position position = 4;
  333. }
  334. // The bounding polygon around the face. The coordinates of the bounding box
  335. // are in the original image's scale.
  336. // The bounding box is computed to "frame" the face in accordance with human
  337. // expectations. It is based on the landmarker results.
  338. // Note that one or more x and/or y coordinates may not be generated in the
  339. // `BoundingPoly` (the polygon will be unbounded) if only a partial face
  340. // appears in the image to be annotated.
  341. BoundingPoly bounding_poly = 1;
  342. // The `fd_bounding_poly` bounding polygon is tighter than the
  343. // `boundingPoly`, and encloses only the skin part of the face. Typically, it
  344. // is used to eliminate the face from any image analysis that detects the
  345. // "amount of skin" visible in an image. It is not based on the
  346. // landmarker results, only on the initial face detection, hence
  347. // the <code>fd</code> (face detection) prefix.
  348. BoundingPoly fd_bounding_poly = 2;
  349. // Detected face landmarks.
  350. repeated Landmark landmarks = 3;
  351. // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
  352. // of the face relative to the image vertical about the axis perpendicular to
  353. // the face. Range [-180,180].
  354. float roll_angle = 4;
  355. // Yaw angle, which indicates the leftward/rightward angle that the face is
  356. // pointing relative to the vertical plane perpendicular to the image. Range
  357. // [-180,180].
  358. float pan_angle = 5;
  359. // Pitch angle, which indicates the upwards/downwards angle that the face is
  360. // pointing relative to the image's horizontal plane. Range [-180,180].
  361. float tilt_angle = 6;
  362. // Detection confidence. Range [0, 1].
  363. float detection_confidence = 7;
  364. // Face landmarking confidence. Range [0, 1].
  365. float landmarking_confidence = 8;
  366. // Joy likelihood.
  367. Likelihood joy_likelihood = 9;
  368. // Sorrow likelihood.
  369. Likelihood sorrow_likelihood = 10;
  370. // Anger likelihood.
  371. Likelihood anger_likelihood = 11;
  372. // Surprise likelihood.
  373. Likelihood surprise_likelihood = 12;
  374. // Under-exposed likelihood.
  375. Likelihood under_exposed_likelihood = 13;
  376. // Blurred likelihood.
  377. Likelihood blurred_likelihood = 14;
  378. // Headwear likelihood.
  379. Likelihood headwear_likelihood = 15;
  380. }
  381. // Detected entity location information.
  382. message LocationInfo {
  383. // lat/long location coordinates.
  384. google.type.LatLng lat_lng = 1;
  385. }
  386. // A `Property` consists of a user-supplied name/value pair.
  387. message Property {
  388. // Name of the property.
  389. string name = 1;
  390. // Value of the property.
  391. string value = 2;
  392. // Value of numeric properties.
  393. uint64 uint64_value = 3;
  394. }
  395. // Set of detected entity features.
  396. message EntityAnnotation {
  397. // Opaque entity ID. Some IDs may be available in
  398. // [Google Knowledge Graph Search
  399. // API](https://developers.google.com/knowledge-graph/).
  400. string mid = 1;
  401. // The language code for the locale in which the entity textual
  402. // `description` is expressed.
  403. string locale = 2;
  404. // Entity textual description, expressed in its `locale` language.
  405. string description = 3;
  406. // Overall score of the result. Range [0, 1].
  407. float score = 4;
  408. // **Deprecated. Use `score` instead.**
  409. // The accuracy of the entity detection in an image.
  410. // For example, for an image in which the "Eiffel Tower" entity is detected,
  411. // this field represents the confidence that there is a tower in the query
  412. // image. Range [0, 1].
  413. float confidence = 5 [deprecated = true];
  414. // The relevancy of the ICA (Image Content Annotation) label to the
  415. // image. For example, the relevancy of "tower" is likely higher to an image
  416. // containing the detected "Eiffel Tower" than to an image containing a
  417. // detected distant towering building, even though the confidence that
  418. // there is a tower in each image may be the same. Range [0, 1].
  419. float topicality = 6;
  420. // Image region to which this entity belongs. Not produced
  421. // for `LABEL_DETECTION` features.
  422. BoundingPoly bounding_poly = 7;
  423. // The location information for the detected entity. Multiple
  424. // `LocationInfo` elements can be present because one location may
  425. // indicate the location of the scene in the image, and another location
  426. // may indicate the location of the place where the image was taken.
  427. // Location information is usually present for landmarks.
  428. repeated LocationInfo locations = 8;
  429. // Some entities may have optional user-supplied `Property` (name/value)
  430. // fields, such a score or string that qualifies the entity.
  431. repeated Property properties = 9;
  432. }
  433. // Set of detected objects with bounding boxes.
  434. message LocalizedObjectAnnotation {
  435. // Object ID that should align with EntityAnnotation mid.
  436. string mid = 1;
  437. // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  438. // information, see
  439. // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  440. string language_code = 2;
  441. // Object name, expressed in its `language_code` language.
  442. string name = 3;
  443. // Score of the result. Range [0, 1].
  444. float score = 4;
  445. // Image region to which this object belongs. This must be populated.
  446. BoundingPoly bounding_poly = 5;
  447. }
  448. // Set of features pertaining to the image, computed by computer vision
  449. // methods over safe-search verticals (for example, adult, spoof, medical,
  450. // violence).
  451. message SafeSearchAnnotation {
  452. // Represents the adult content likelihood for the image. Adult content may
  453. // contain elements such as nudity, pornographic images or cartoons, or
  454. // sexual activities.
  455. Likelihood adult = 1;
  456. // Spoof likelihood. The likelihood that an modification
  457. // was made to the image's canonical version to make it appear
  458. // funny or offensive.
  459. Likelihood spoof = 2;
  460. // Likelihood that this is a medical image.
  461. Likelihood medical = 3;
  462. // Likelihood that this image contains violent content.
  463. Likelihood violence = 4;
  464. // Likelihood that the request image contains racy content. Racy content may
  465. // include (but is not limited to) skimpy or sheer clothing, strategically
  466. // covered nudity, lewd or provocative poses, or close-ups of sensitive
  467. // body areas.
  468. Likelihood racy = 9;
  469. // Confidence of adult_score. Range [0, 1]. 0 means not confident, 1 means
  470. // very confident.
  471. float adult_confidence = 16 [deprecated = true];
  472. // Confidence of spoof_score. Range [0, 1]. 0 means not confident, 1 means
  473. // very confident.
  474. float spoof_confidence = 18 [deprecated = true];
  475. // Confidence of medical_score. Range [0, 1]. 0 means not confident, 1 means
  476. // very confident.
  477. float medical_confidence = 20 [deprecated = true];
  478. // Confidence of violence_score. Range [0, 1]. 0 means not confident, 1 means
  479. // very confident.
  480. float violence_confidence = 22 [deprecated = true];
  481. // Confidence of racy_score. Range [0, 1]. 0 means not confident, 1 means very
  482. // confident.
  483. float racy_confidence = 24 [deprecated = true];
  484. // Confidence of nsfw_score. Range [0, 1]. 0 means not confident, 1 means very
  485. // confident.
  486. float nsfw_confidence = 26 [deprecated = true];
  487. }
  488. // Rectangle determined by min and max `LatLng` pairs.
  489. message LatLongRect {
  490. // Min lat/long pair.
  491. google.type.LatLng min_lat_lng = 1;
  492. // Max lat/long pair.
  493. google.type.LatLng max_lat_lng = 2;
  494. }
  495. // Color information consists of RGB channels, score, and the fraction of
  496. // the image that the color occupies in the image.
  497. message ColorInfo {
  498. // RGB components of the color.
  499. google.type.Color color = 1;
  500. // Image-specific score for this color. Value in range [0, 1].
  501. float score = 2;
  502. // The fraction of pixels the color occupies in the image.
  503. // Value in range [0, 1].
  504. float pixel_fraction = 3;
  505. }
  506. // Set of dominant colors and their corresponding scores.
  507. message DominantColorsAnnotation {
  508. // RGB color values with their score and pixel fraction.
  509. repeated ColorInfo colors = 1;
  510. }
  511. // Stores image properties, such as dominant colors.
  512. message ImageProperties {
  513. // If present, dominant colors completed successfully.
  514. DominantColorsAnnotation dominant_colors = 1;
  515. }
  516. // Single crop hint that is used to generate a new crop when serving an image.
  517. message CropHint {
  518. // The bounding polygon for the crop region. The coordinates of the bounding
  519. // box are in the original image's scale.
  520. BoundingPoly bounding_poly = 1;
  521. // Confidence of this being a salient region. Range [0, 1].
  522. float confidence = 2;
  523. // Fraction of importance of this salient region with respect to the original
  524. // image.
  525. float importance_fraction = 3;
  526. }
  527. // Set of crop hints that are used to generate new crops when serving images.
  528. message CropHintsAnnotation {
  529. // Crop hint results.
  530. repeated CropHint crop_hints = 1;
  531. }
  532. // Parameters for crop hints annotation request.
  533. message CropHintsParams {
  534. // Aspect ratios in floats, representing the ratio of the width to the height
  535. // of the image. For example, if the desired aspect ratio is 4/3, the
  536. // corresponding float value should be 1.33333. If not specified, the
  537. // best possible crop is returned. The number of provided aspect ratios is
  538. // limited to a maximum of 16; any aspect ratios provided after the 16th are
  539. // ignored.
  540. repeated float aspect_ratios = 1;
  541. }
  542. // Parameters for web detection request.
  543. message WebDetectionParams {
  544. // Whether to include results derived from the geo information in the image.
  545. bool include_geo_results = 2;
  546. }
  547. // Parameters for text detections. This is used to control TEXT_DETECTION and
  548. // DOCUMENT_TEXT_DETECTION features.
  549. message TextDetectionParams {
  550. // By default, Cloud Vision API only includes confidence score for
  551. // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
  552. // score for TEXT_DETECTION as well.
  553. bool enable_text_detection_confidence_score = 9;
  554. }
  555. // Image context and/or feature-specific parameters.
  556. message ImageContext {
  557. // Not used.
  558. LatLongRect lat_long_rect = 1;
  559. // List of languages to use for TEXT_DETECTION. In most cases, an empty value
  560. // yields the best results since it enables automatic language detection. For
  561. // languages based on the Latin alphabet, setting `language_hints` is not
  562. // needed. In rare cases, when the language of the text in the image is known,
  563. // setting a hint will help get better results (although it will be a
  564. // significant hindrance if the hint is wrong). Text detection returns an
  565. // error if one or more of the specified languages is not one of the
  566. // [supported languages](https://cloud.google.com/vision/docs/languages).
  567. repeated string language_hints = 2;
  568. // Parameters for crop hints annotation request.
  569. CropHintsParams crop_hints_params = 4;
  570. // Parameters for product search.
  571. ProductSearchParams product_search_params = 5;
  572. // Parameters for web detection.
  573. WebDetectionParams web_detection_params = 6;
  574. // Parameters for text detection and document text detection.
  575. TextDetectionParams text_detection_params = 12;
  576. }
  577. // Request for performing Google Cloud Vision API tasks over a user-provided
  578. // image, with user-requested features, and with context information.
  579. message AnnotateImageRequest {
  580. // The image to be processed.
  581. Image image = 1;
  582. // Requested features.
  583. repeated Feature features = 2;
  584. // Additional context that may accompany the image.
  585. ImageContext image_context = 3;
  586. }
  587. // If an image was produced from a file (e.g. a PDF), this message gives
  588. // information about the source of that image.
  589. message ImageAnnotationContext {
  590. // The URI of the file used to produce the image.
  591. string uri = 1;
  592. // If the file was a PDF or TIFF, this field gives the page number within
  593. // the file used to produce the image.
  594. int32 page_number = 2;
  595. }
  596. // Response to an image annotation request.
  597. message AnnotateImageResponse {
  598. // If present, face detection has completed successfully.
  599. repeated FaceAnnotation face_annotations = 1;
  600. // If present, landmark detection has completed successfully.
  601. repeated EntityAnnotation landmark_annotations = 2;
  602. // If present, logo detection has completed successfully.
  603. repeated EntityAnnotation logo_annotations = 3;
  604. // If present, label detection has completed successfully.
  605. repeated EntityAnnotation label_annotations = 4;
  606. // If present, localized object detection has completed successfully.
  607. // This will be sorted descending by confidence score.
  608. repeated LocalizedObjectAnnotation localized_object_annotations = 22;
  609. // If present, text (OCR) detection has completed successfully.
  610. repeated EntityAnnotation text_annotations = 5;
  611. // If present, text (OCR) detection or document (OCR) text detection has
  612. // completed successfully.
  613. // This annotation provides the structural hierarchy for the OCR detected
  614. // text.
  615. TextAnnotation full_text_annotation = 12;
  616. // If present, safe-search annotation has completed successfully.
  617. SafeSearchAnnotation safe_search_annotation = 6;
  618. // If present, image properties were extracted successfully.
  619. ImageProperties image_properties_annotation = 8;
  620. // If present, crop hints have completed successfully.
  621. CropHintsAnnotation crop_hints_annotation = 11;
  622. // If present, web detection has completed successfully.
  623. WebDetection web_detection = 13;
  624. // If present, product search has completed successfully.
  625. ProductSearchResults product_search_results = 14;
  626. // If set, represents the error message for the operation.
  627. // Note that filled-in image annotations are guaranteed to be
  628. // correct, even when `error` is set.
  629. google.rpc.Status error = 9;
  630. // If present, contextual information is needed to understand where this image
  631. // comes from.
  632. ImageAnnotationContext context = 21;
  633. }
  634. // Multiple image annotation requests are batched into a single service call.
  635. message BatchAnnotateImagesRequest {
  636. // Required. Individual image annotation requests for this batch.
  637. repeated AnnotateImageRequest requests = 1
  638. [(google.api.field_behavior) = REQUIRED];
  639. // Optional. Target project and location to make a call.
  640. //
  641. // Format: `projects/{project-id}/locations/{location-id}`.
  642. //
  643. // If no parent is specified, a region will be chosen automatically.
  644. //
  645. // Supported location-ids:
  646. // `us`: USA country only,
  647. // `asia`: East asia areas, like Japan, Taiwan,
  648. // `eu`: The European Union.
  649. //
  650. // Example: `projects/project-A/locations/eu`.
  651. string parent = 4;
  652. }
  653. // Response to a batch image annotation request.
  654. message BatchAnnotateImagesResponse {
  655. // Individual responses to image annotation requests within the batch.
  656. repeated AnnotateImageResponse responses = 1;
  657. }
  658. // A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
  659. message AnnotateFileRequest {
  660. // Required. Information about the input file.
  661. InputConfig input_config = 1;
  662. // Required. Requested features.
  663. repeated Feature features = 2;
  664. // Additional context that may accompany the image(s) in the file.
  665. ImageContext image_context = 3;
  666. // Pages of the file to perform image annotation.
  667. //
  668. // Pages starts from 1, we assume the first page of the file is page 1.
  669. // At most 5 pages are supported per request. Pages can be negative.
  670. //
  671. // Page 1 means the first page.
  672. // Page 2 means the second page.
  673. // Page -1 means the last page.
  674. // Page -2 means the second to the last page.
  675. //
  676. // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
  677. //
  678. // If this field is empty, by default the service performs image annotation
  679. // for the first 5 pages of the file.
  680. repeated int32 pages = 4;
  681. }
  682. // Response to a single file annotation request. A file may contain one or more
  683. // images, which individually have their own responses.
  684. message AnnotateFileResponse {
  685. // Information about the file for which this response is generated.
  686. InputConfig input_config = 1;
  687. // Individual responses to images found within the file. This field will be
  688. // empty if the `error` field is set.
  689. repeated AnnotateImageResponse responses = 2;
  690. // This field gives the total number of pages in the file.
  691. int32 total_pages = 3;
  692. // If set, represents the error message for the failed request. The
  693. // `responses` field will not be set in this case.
  694. google.rpc.Status error = 4;
  695. }
  696. // A list of requests to annotate files using the BatchAnnotateFiles API.
  697. message BatchAnnotateFilesRequest {
  698. // Required. The list of file annotation requests. Right now we support only
  699. // one AnnotateFileRequest in BatchAnnotateFilesRequest.
  700. repeated AnnotateFileRequest requests = 1
  701. [(google.api.field_behavior) = REQUIRED];
  702. // Optional. Target project and location to make a call.
  703. //
  704. // Format: `projects/{project-id}/locations/{location-id}`.
  705. //
  706. // If no parent is specified, a region will be chosen automatically.
  707. //
  708. // Supported location-ids:
  709. // `us`: USA country only,
  710. // `asia`: East asia areas, like Japan, Taiwan,
  711. // `eu`: The European Union.
  712. //
  713. // Example: `projects/project-A/locations/eu`.
  714. string parent = 3;
  715. }
  716. // A list of file annotation responses.
  717. message BatchAnnotateFilesResponse {
  718. // The list of file annotation responses, each response corresponding to each
  719. // AnnotateFileRequest in BatchAnnotateFilesRequest.
  720. repeated AnnotateFileResponse responses = 1;
  721. }
  722. // An offline file annotation request.
  723. message AsyncAnnotateFileRequest {
  724. // Required. Information about the input file.
  725. InputConfig input_config = 1;
  726. // Required. Requested features.
  727. repeated Feature features = 2;
  728. // Additional context that may accompany the image(s) in the file.
  729. ImageContext image_context = 3;
  730. // Required. The desired output location and metadata (e.g. format).
  731. OutputConfig output_config = 4;
  732. }
  733. // The response for a single offline file annotation request.
  734. message AsyncAnnotateFileResponse {
  735. // The output location and metadata from AsyncAnnotateFileRequest.
  736. OutputConfig output_config = 1;
  737. }
  738. // Request for async image annotation for a list of images.
  739. message AsyncBatchAnnotateImagesRequest {
  740. // Required. Individual image annotation requests for this batch.
  741. repeated AnnotateImageRequest requests = 1
  742. [(google.api.field_behavior) = REQUIRED];
  743. // Required. The desired output location and metadata (e.g. format).
  744. OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
  745. // Optional. Target project and location to make a call.
  746. //
  747. // Format: `projects/{project-id}/locations/{location-id}`.
  748. //
  749. // If no parent is specified, a region will be chosen automatically.
  750. //
  751. // Supported location-ids:
  752. // `us`: USA country only,
  753. // `asia`: East asia areas, like Japan, Taiwan,
  754. // `eu`: The European Union.
  755. //
  756. // Example: `projects/project-A/locations/eu`.
  757. string parent = 4;
  758. }
  759. // Response to an async batch image annotation request.
  760. message AsyncBatchAnnotateImagesResponse {
  761. // The output location and metadata from AsyncBatchAnnotateImagesRequest.
  762. OutputConfig output_config = 1;
  763. }
  764. // Multiple async file annotation requests are batched into a single service
  765. // call.
  766. message AsyncBatchAnnotateFilesRequest {
  767. // Required. Individual async file annotation requests for this batch.
  768. repeated AsyncAnnotateFileRequest requests = 1
  769. [(google.api.field_behavior) = REQUIRED];
  770. // Optional. Target project and location to make a call.
  771. //
  772. // Format: `projects/{project-id}/locations/{location-id}`.
  773. //
  774. // If no parent is specified, a region will be chosen automatically.
  775. //
  776. // Supported location-ids:
  777. // `us`: USA country only,
  778. // `asia`: East asia areas, like Japan, Taiwan,
  779. // `eu`: The European Union.
  780. //
  781. // Example: `projects/project-A/locations/eu`.
  782. string parent = 4;
  783. }
  784. // Response to an async batch file annotation request.
  785. message AsyncBatchAnnotateFilesResponse {
  786. // The list of file annotation responses, one for each request in
  787. // AsyncBatchAnnotateFilesRequest.
  788. repeated AsyncAnnotateFileResponse responses = 1;
  789. }
  790. // The desired input location and metadata.
  791. message InputConfig {
  792. // The Google Cloud Storage location to read the input from.
  793. GcsSource gcs_source = 1;
  794. // File content, represented as a stream of bytes.
  795. // Note: As with all `bytes` fields, protobuffers use a pure binary
  796. // representation, whereas JSON representations use base64.
  797. //
  798. // Currently, this field only works for BatchAnnotateFiles requests. It does
  799. // not work for AsyncBatchAnnotateFiles requests.
  800. bytes content = 3;
  801. // The type of the file. Currently only "application/pdf", "image/tiff" and
  802. // "image/gif" are supported. Wildcards are not supported.
  803. string mime_type = 2;
  804. }
  805. // The desired output location and metadata.
  806. message OutputConfig {
  807. // The Google Cloud Storage location to write the output(s) to.
  808. GcsDestination gcs_destination = 1;
  809. // The max number of response protos to put into each output JSON file on
  810. // Google Cloud Storage.
  811. // The valid range is [1, 100]. If not specified, the default value is 20.
  812. //
  813. // For example, for one pdf file with 100 pages, 100 response protos will
  814. // be generated. If `batch_size` = 20, then 5 json files each
  815. // containing 20 response protos will be written under the prefix
  816. // `gcs_destination`.`uri`.
  817. //
  818. // Currently, batch_size only applies to GcsDestination, with potential future
  819. // support for other output configurations.
  820. int32 batch_size = 2;
  821. }
  822. // The Google Cloud Storage location where the input will be read from.
  823. message GcsSource {
  824. // Google Cloud Storage URI for the input file. This must only be a
  825. // Google Cloud Storage object. Wildcards are not currently supported.
  826. string uri = 1;
  827. }
  828. // The Google Cloud Storage location where the output will be written to.
  829. message GcsDestination {
  830. // Google Cloud Storage URI prefix where the results will be stored. Results
  831. // will be in JSON format and preceded by its corresponding input URI prefix.
  832. // This field can either represent a gcs file prefix or gcs directory. In
  833. // either case, the uri should be unique because in order to get all of the
  834. // output files, you will need to do a wildcard gcs search on the uri prefix
  835. // you provide.
  836. //
  837. // Examples:
  838. //
  839. // * File Prefix: gs://bucket-name/here/filenameprefix The output files
  840. // will be created in gs://bucket-name/here/ and the names of the
  841. // output files will begin with "filenameprefix".
  842. //
  843. // * Directory Prefix: gs://bucket-name/some/location/ The output files
  844. // will be created in gs://bucket-name/some/location/ and the names of the
  845. // output files could be anything because there was no filename prefix
  846. // specified.
  847. //
  848. // If multiple outputs, each response is still AnnotateFileResponse, each of
  849. // which contains some subset of the full list of AnnotateImageResponse.
  850. // Multiple outputs can happen if, for example, the output JSON is too large
  851. // and overflows into multiple sharded files.
  852. string uri = 1;
  853. }
  854. // Contains metadata for the BatchAnnotateImages operation.
  855. message OperationMetadata {
  856. // Batch operation states.
  857. enum State {
  858. // Invalid.
  859. STATE_UNSPECIFIED = 0;
  860. // Request is received.
  861. CREATED = 1;
  862. // Request is actively being processed.
  863. RUNNING = 2;
  864. // The batch processing is done.
  865. DONE = 3;
  866. // The batch processing was cancelled.
  867. CANCELLED = 4;
  868. }
  869. // Current state of the batch operation.
  870. State state = 1;
  871. // The time when the batch request was received.
  872. google.protobuf.Timestamp create_time = 5;
  873. // The time when the operation result was last updated.
  874. google.protobuf.Timestamp update_time = 6;
  875. }