text_annotation.proto 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.vision.v1p4beta1;
  17. import "google/api/annotations.proto";
  18. import "google/cloud/vision/v1p4beta1/geometry.proto";
  19. option cc_enable_arenas = true;
  20. option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p4beta1;vision";
  21. option java_multiple_files = true;
  22. option java_outer_classname = "TextAnnotationProto";
  23. option java_package = "com.google.cloud.vision.v1p4beta1";
  24. option objc_class_prefix = "GCVN";
  25. // TextAnnotation contains a structured representation of OCR extracted text.
  26. // The hierarchy of an OCR extracted text structure is like this:
  27. // TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
  28. // Each structural component, starting from Page, may further have their own
  29. // properties. Properties describe detected languages, breaks etc.. Please refer
  30. // to the
  31. // [TextAnnotation.TextProperty][google.cloud.vision.v1p4beta1.TextAnnotation.TextProperty]
  32. // message definition below for more detail.
  33. message TextAnnotation {
  34. // Detected language for a structural component.
  35. message DetectedLanguage {
  36. // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  37. // information, see
  38. // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  39. string language_code = 1;
  40. // Confidence of detected language. Range [0, 1].
  41. float confidence = 2;
  42. }
  43. // Detected start or end of a structural component.
  44. message DetectedBreak {
  45. // Enum to denote the type of break found. New line, space etc.
  46. enum BreakType {
  47. // Unknown break label type.
  48. UNKNOWN = 0;
  49. // Regular space.
  50. SPACE = 1;
  51. // Sure space (very wide).
  52. SURE_SPACE = 2;
  53. // Line-wrapping break.
  54. EOL_SURE_SPACE = 3;
  55. // End-line hyphen that is not present in text; does not co-occur with
  56. // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
  57. HYPHEN = 4;
  58. // Line break that ends a paragraph.
  59. LINE_BREAK = 5;
  60. }
  61. // Detected break type.
  62. BreakType type = 1;
  63. // True if break prepends the element.
  64. bool is_prefix = 2;
  65. }
  66. // Additional information detected on the structural component.
  67. message TextProperty {
  68. // A list of detected languages together with confidence.
  69. repeated DetectedLanguage detected_languages = 1;
  70. // Detected start or end of a text segment.
  71. DetectedBreak detected_break = 2;
  72. }
  73. // List of pages detected by OCR.
  74. repeated Page pages = 1;
  75. // UTF-8 text detected on the pages.
  76. string text = 2;
  77. }
  78. // Detected page from OCR.
  79. message Page {
  80. // Additional information detected on the page.
  81. TextAnnotation.TextProperty property = 1;
  82. // Page width. For PDFs the unit is points. For images (including
  83. // TIFFs) the unit is pixels.
  84. int32 width = 2;
  85. // Page height. For PDFs the unit is points. For images (including
  86. // TIFFs) the unit is pixels.
  87. int32 height = 3;
  88. // List of blocks of text, images etc on this page.
  89. repeated Block blocks = 4;
  90. // Confidence of the OCR results on the page. Range [0, 1].
  91. float confidence = 5;
  92. }
  93. // Logical element on the page.
  94. message Block {
  95. // Type of a block (text, image etc) as identified by OCR.
  96. enum BlockType {
  97. // Unknown block type.
  98. UNKNOWN = 0;
  99. // Regular text block.
  100. TEXT = 1;
  101. // Table block.
  102. TABLE = 2;
  103. // Image block.
  104. PICTURE = 3;
  105. // Horizontal/vertical line box.
  106. RULER = 4;
  107. // Barcode block.
  108. BARCODE = 5;
  109. }
  110. // Additional information detected for the block.
  111. TextAnnotation.TextProperty property = 1;
  112. // The bounding box for the block.
  113. // The vertices are in the order of top-left, top-right, bottom-right,
  114. // bottom-left. When a rotation of the bounding box is detected the rotation
  115. // is represented as around the top-left corner as defined when the text is
  116. // read in the 'natural' orientation.
  117. // For example:
  118. //
  119. // * when the text is horizontal it might look like:
  120. //
  121. // 0----1
  122. // | |
  123. // 3----2
  124. //
  125. // * when it's rotated 180 degrees around the top-left corner it becomes:
  126. //
  127. // 2----3
  128. // | |
  129. // 1----0
  130. //
  131. // and the vertex order will still be (0, 1, 2, 3).
  132. BoundingPoly bounding_box = 2;
  133. // List of paragraphs in this block (if this blocks is of type text).
  134. repeated Paragraph paragraphs = 3;
  135. // Detected block type (text, image etc) for this block.
  136. BlockType block_type = 4;
  137. // Confidence of the OCR results on the block. Range [0, 1].
  138. float confidence = 5;
  139. }
  140. // Structural unit of text representing a number of words in certain order.
  141. message Paragraph {
  142. // Additional information detected for the paragraph.
  143. TextAnnotation.TextProperty property = 1;
  144. // The bounding box for the paragraph.
  145. // The vertices are in the order of top-left, top-right, bottom-right,
  146. // bottom-left. When a rotation of the bounding box is detected the rotation
  147. // is represented as around the top-left corner as defined when the text is
  148. // read in the 'natural' orientation.
  149. // For example:
  150. // * when the text is horizontal it might look like:
  151. // 0----1
  152. // | |
  153. // 3----2
  154. // * when it's rotated 180 degrees around the top-left corner it becomes:
  155. // 2----3
  156. // | |
  157. // 1----0
  158. // and the vertex order will still be (0, 1, 2, 3).
  159. BoundingPoly bounding_box = 2;
  160. // List of all words in this paragraph.
  161. repeated Word words = 3;
  162. // Confidence of the OCR results for the paragraph. Range [0, 1].
  163. float confidence = 4;
  164. }
  165. // A word representation.
  166. message Word {
  167. // Additional information detected for the word.
  168. TextAnnotation.TextProperty property = 1;
  169. // The bounding box for the word.
  170. // The vertices are in the order of top-left, top-right, bottom-right,
  171. // bottom-left. When a rotation of the bounding box is detected the rotation
  172. // is represented as around the top-left corner as defined when the text is
  173. // read in the 'natural' orientation.
  174. // For example:
  175. // * when the text is horizontal it might look like:
  176. // 0----1
  177. // | |
  178. // 3----2
  179. // * when it's rotated 180 degrees around the top-left corner it becomes:
  180. // 2----3
  181. // | |
  182. // 1----0
  183. // and the vertex order will still be (0, 1, 2, 3).
  184. BoundingPoly bounding_box = 2;
  185. // List of symbols in the word.
  186. // The order of the symbols follows the natural reading order.
  187. repeated Symbol symbols = 3;
  188. // Confidence of the OCR results for the word. Range [0, 1].
  189. float confidence = 4;
  190. }
  191. // A single symbol representation.
  192. message Symbol {
  193. // Additional information detected for the symbol.
  194. TextAnnotation.TextProperty property = 1;
  195. // The bounding box for the symbol.
  196. // The vertices are in the order of top-left, top-right, bottom-right,
  197. // bottom-left. When a rotation of the bounding box is detected the rotation
  198. // is represented as around the top-left corner as defined when the text is
  199. // read in the 'natural' orientation.
  200. // For example:
  201. // * when the text is horizontal it might look like:
  202. // 0----1
  203. // | |
  204. // 3----2
  205. // * when it's rotated 180 degrees around the top-left corner it becomes:
  206. // 2----3
  207. // | |
  208. // 1----0
  209. // and the vertex order will still be (0, 1, 2, 3).
  210. BoundingPoly bounding_box = 2;
  211. // The actual UTF-8 representation of the symbol.
  212. string text = 3;
  213. // Confidence of the OCR results for the symbol. Range [0, 1].
  214. float confidence = 4;
  215. }