document.proto 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.documentai.v1beta2;
  16. import "google/api/field_behavior.proto";
  17. import "google/cloud/documentai/v1beta2/geometry.proto";
  18. import "google/rpc/status.proto";
  19. import "google/type/color.proto";
  20. import "google/api/annotations.proto";
  21. option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2";
  22. option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
  23. option java_multiple_files = true;
  24. option java_outer_classname = "DocumentProto";
  25. option java_package = "com.google.cloud.documentai.v1beta2";
  26. option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2";
  27. option ruby_package = "Google::Cloud::DocumentAI::V1beta2";
  28. // Document represents the canonical document resource in Document Understanding
  29. // AI.
  30. // It is an interchange format that provides insights into documents and allows
  31. // for collaboration between users and Document Understanding AI to iterate and
  32. // optimize for quality.
  33. message Document {
  34. // For a large document, sharding may be performed to produce several
  35. // document shards. Each document shard contains this field to detail which
  36. // shard it is.
  37. message ShardInfo {
  38. // The 0-based index of this shard.
  39. int64 shard_index = 1;
  40. // Total number of shards.
  41. int64 shard_count = 2;
  42. // The index of the first character in [Document.text][google.cloud.documentai.v1beta2.Document.text] in the overall
  43. // document global text.
  44. int64 text_offset = 3;
  45. }
  46. // Label attaches schema information and/or other metadata to segments within
  47. // a [Document][google.cloud.documentai.v1beta2.Document]. Multiple [Label][google.cloud.documentai.v1beta2.Document.Label]s on a single field can denote either
  48. // different labels, different instances of the same label created at
  49. // different times, or some combination of both.
  50. message Label {
  51. // Provenance of the label.
  52. oneof source {
  53. // Label is generated AutoML model. This field stores the full resource
  54. // name of the AutoML model.
  55. //
  56. // Format:
  57. // `projects/{project-id}/locations/{location-id}/models/{model-id}`
  58. string automl_model = 2;
  59. }
  60. // Name of the label.
  61. //
  62. // When the label is generated from AutoML Text Classification model, this
  63. // field represents the name of the category.
  64. string name = 1;
  65. // Confidence score between 0 and 1 for label assignment.
  66. float confidence = 3;
  67. }
  68. // Annotation for common text style attributes. This adheres to CSS
  69. // conventions as much as possible.
  70. message Style {
  71. // Font size with unit.
  72. message FontSize {
  73. // Font size for the text.
  74. float size = 1;
  75. // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
  76. string unit = 2;
  77. }
  78. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  79. TextAnchor text_anchor = 1;
  80. // Text color.
  81. google.type.Color color = 2;
  82. // Text background color.
  83. google.type.Color background_color = 3;
  84. // Font weight. Possible values are normal, bold, bolder, and lighter.
  85. // https://www.w3schools.com/cssref/pr_font_weight.asp
  86. string font_weight = 4;
  87. // Text style. Possible values are normal, italic, and oblique.
  88. // https://www.w3schools.com/cssref/pr_font_font-style.asp
  89. string text_style = 5;
  90. // Text decoration. Follows CSS standard.
  91. // <text-decoration-line> <text-decoration-color> <text-decoration-style>
  92. // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
  93. string text_decoration = 6;
  94. // Font size.
  95. FontSize font_size = 7;
  96. }
  97. // A page in a [Document][google.cloud.documentai.v1beta2.Document].
  98. message Page {
  99. // Dimension for the page.
  100. message Dimension {
  101. // Page width.
  102. float width = 1;
  103. // Page height.
  104. float height = 2;
  105. // Dimension unit.
  106. string unit = 3;
  107. }
  108. // Visual element describing a layout unit on a page.
  109. message Layout {
  110. // Detected human reading orientation.
  111. enum Orientation {
  112. // Unspecified orientation.
  113. ORIENTATION_UNSPECIFIED = 0;
  114. // Orientation is aligned with page up.
  115. PAGE_UP = 1;
  116. // Orientation is aligned with page right.
  117. // Turn the head 90 degrees clockwise from upright to read.
  118. PAGE_RIGHT = 2;
  119. // Orientation is aligned with page down.
  120. // Turn the head 180 degrees from upright to read.
  121. PAGE_DOWN = 3;
  122. // Orientation is aligned with page left.
  123. // Turn the head 90 degrees counterclockwise from upright to read.
  124. PAGE_LEFT = 4;
  125. }
  126. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  127. TextAnchor text_anchor = 1;
  128. // Confidence of the current [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] within context of the object this
  129. // layout is for. e.g. confidence can be for a single token, a table,
  130. // a visual element, etc. depending on context. Range [0, 1].
  131. float confidence = 2;
  132. // The bounding polygon for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
  133. BoundingPoly bounding_poly = 3;
  134. // Detected orientation for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
  135. Orientation orientation = 4;
  136. // Optional. This is the identifier used by referencing [PageAnchor][google.cloud.documentai.v1beta2.Document.PageAnchor]s.
  137. string id = 5 [(google.api.field_behavior) = OPTIONAL];
  138. }
  139. // A block has a set of lines (collected into paragraphs) that have a
  140. // common line-spacing and orientation.
  141. message Block {
  142. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta2.Document.Page.Block].
  143. Layout layout = 1;
  144. // A list of detected languages together with confidence.
  145. repeated DetectedLanguage detected_languages = 2;
  146. }
  147. // A collection of lines that a human would perceive as a paragraph.
  148. message Paragraph {
  149. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph].
  150. Layout layout = 1;
  151. // A list of detected languages together with confidence.
  152. repeated DetectedLanguage detected_languages = 2;
  153. }
  154. // A collection of tokens that a human would perceive as a line.
  155. // Does not cross column boundaries, can be horizontal, vertical, etc.
  156. message Line {
  157. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta2.Document.Page.Line].
  158. Layout layout = 1;
  159. // A list of detected languages together with confidence.
  160. repeated DetectedLanguage detected_languages = 2;
  161. }
  162. // A detected token.
  163. message Token {
  164. // Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
  165. message DetectedBreak {
  166. // Enum to denote the type of break found.
  167. enum Type {
  168. // Unspecified break type.
  169. TYPE_UNSPECIFIED = 0;
  170. // A single whitespace.
  171. SPACE = 1;
  172. // A wider whitespace.
  173. WIDE_SPACE = 2;
  174. // A hyphen that indicates that a token has been split across lines.
  175. HYPHEN = 3;
  176. }
  177. // Detected break type.
  178. Type type = 1;
  179. }
  180. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
  181. Layout layout = 1;
  182. // Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
  183. DetectedBreak detected_break = 2;
  184. // A list of detected languages together with confidence.
  185. repeated DetectedLanguage detected_languages = 3;
  186. }
  187. // Detected non-text visual elements e.g. checkbox, signature etc. on the
  188. // page.
  189. message VisualElement {
  190. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
  191. Layout layout = 1;
  192. // Type of the [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
  193. string type = 2;
  194. // A list of detected languages together with confidence.
  195. repeated DetectedLanguage detected_languages = 3;
  196. }
  197. // A table representation similar to HTML table structure.
  198. message Table {
  199. // A row of table cells.
  200. message TableRow {
  201. // Cells that make up this row.
  202. repeated TableCell cells = 1;
  203. }
  204. // A cell representation inside the table.
  205. message TableCell {
  206. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell].
  207. Layout layout = 1;
  208. // How many rows this cell spans.
  209. int32 row_span = 2;
  210. // How many columns this cell spans.
  211. int32 col_span = 3;
  212. // A list of detected languages together with confidence.
  213. repeated DetectedLanguage detected_languages = 4;
  214. }
  215. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta2.Document.Page.Table].
  216. Layout layout = 1;
  217. // Header rows of the table.
  218. repeated TableRow header_rows = 2;
  219. // Body rows of the table.
  220. repeated TableRow body_rows = 3;
  221. // A list of detected languages together with confidence.
  222. repeated DetectedLanguage detected_languages = 4;
  223. }
  224. // A form field detected on the page.
  225. message FormField {
  226. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] name. e.g. `Address`, `Email`,
  227. // `Grand total`, `Phone number`, etc.
  228. Layout field_name = 1;
  229. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] value.
  230. Layout field_value = 2;
  231. // A list of detected languages for name together with confidence.
  232. repeated DetectedLanguage name_detected_languages = 3;
  233. // A list of detected languages for value together with confidence.
  234. repeated DetectedLanguage value_detected_languages = 4;
  235. // If the value is non-textual, this field represents the type. Current
  236. // valid values are:
  237. // - blank (this indicates the field_value is normal text)
  238. // - "unfilled_checkbox"
  239. // - "filled_checkbox"
  240. string value_type = 5;
  241. // An internal field, created for Labeling UI to export key text.
  242. string corrected_key_text = 6;
  243. // An internal field, created for Labeling UI to export value text.
  244. string corrected_value_text = 7;
  245. }
  246. // Detected language for a structural component.
  247. message DetectedLanguage {
  248. // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  249. // information, see
  250. // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  251. string language_code = 1;
  252. // Confidence of detected language. Range [0, 1].
  253. float confidence = 2;
  254. }
  255. // 1-based index for current [Page][google.cloud.documentai.v1beta2.Document.Page] in a parent [Document][google.cloud.documentai.v1beta2.Document].
  256. // Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta2.Document] for individual
  257. // processing.
  258. int32 page_number = 1;
  259. // Physical dimension of the page.
  260. Dimension dimension = 2;
  261. // [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the page.
  262. Layout layout = 3;
  263. // A list of detected languages together with confidence.
  264. repeated DetectedLanguage detected_languages = 4;
  265. // A list of visually detected text blocks on the page.
  266. // A block has a set of lines (collected into paragraphs) that have a common
  267. // line-spacing and orientation.
  268. repeated Block blocks = 5;
  269. // A list of visually detected text paragraphs on the page.
  270. // A collection of lines that a human would perceive as a paragraph.
  271. repeated Paragraph paragraphs = 6;
  272. // A list of visually detected text lines on the page.
  273. // A collection of tokens that a human would perceive as a line.
  274. repeated Line lines = 7;
  275. // A list of visually detected tokens on the page.
  276. repeated Token tokens = 8;
  277. // A list of detected non-text visual elements e.g. checkbox,
  278. // signature etc. on the page.
  279. repeated VisualElement visual_elements = 9;
  280. // A list of visually detected tables on the page.
  281. repeated Table tables = 10;
  282. // A list of visually detected form fields on the page.
  283. repeated FormField form_fields = 11;
  284. }
  285. // A phrase in the text that is a known entity type, such as a person, an
  286. // organization, or location.
  287. message Entity {
  288. // Provenance of the entity.
  289. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  290. TextAnchor text_anchor = 1;
  291. // Entity type from a schema e.g. `Address`.
  292. string type = 2;
  293. // Text value in the document e.g. `1600 Amphitheatre Pkwy`.
  294. string mention_text = 3;
  295. // Deprecated. Use `id` field instead.
  296. string mention_id = 4;
  297. // Optional. Confidence of detected Schema entity. Range [0, 1].
  298. float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
  299. // Optional. Represents the provenance of this entity wrt. the location on the
  300. // page where it was found.
  301. PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
  302. // Optional. Canonical id. This will be a unique value in the entity list
  303. // for this document.
  304. string id = 7 [(google.api.field_behavior) = OPTIONAL];
  305. // Optional. Temporary field to store the bounding poly for short-term POCs. Used by
  306. // the frontend only. Do not use before you talk to ybo@ and lukasr@.
  307. BoundingPoly bounding_poly_for_demo_frontend = 8 [(google.api.field_behavior) = OPTIONAL];
  308. }
  309. // Relationship between [Entities][google.cloud.documentai.v1beta2.Document.Entity].
  310. message EntityRelation {
  311. // Subject entity id.
  312. string subject_id = 1;
  313. // Object entity id.
  314. string object_id = 2;
  315. // Relationship description.
  316. string relation = 3;
  317. }
  318. // Text reference indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  319. message TextAnchor {
  320. // A text segment in the [Document.text][google.cloud.documentai.v1beta2.Document.text]. The indices may be out of bounds
  321. // which indicate that the text extends into another document shard for
  322. // large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1beta2.Document.ShardInfo.text_offset]
  323. message TextSegment {
  324. // [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  325. int64 start_index = 1;
  326. // [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the
  327. // [Document.text][google.cloud.documentai.v1beta2.Document.text].
  328. int64 end_index = 2;
  329. }
  330. // The text segments from the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  331. repeated TextSegment text_segments = 1;
  332. }
  333. // Referencing elements in [Document.pages][google.cloud.documentai.v1beta2.Document.pages].
  334. message PageAnchor {
  335. // Represents a weak reference to a page element within a document.
  336. message PageRef {
  337. // The type of layout that is being referenced.
  338. enum LayoutType {
  339. // Layout Unspecified.
  340. LAYOUT_TYPE_UNSPECIFIED = 0;
  341. // References a [Page.blocks][google.cloud.documentai.v1beta2.Document.Page.blocks] element.
  342. BLOCK = 1;
  343. // References a [Page.paragraphs][google.cloud.documentai.v1beta2.Document.Page.paragraphs] element.
  344. PARAGRAPH = 2;
  345. // References a [Page.lines][google.cloud.documentai.v1beta2.Document.Page.lines] element.
  346. LINE = 3;
  347. // References a [Page.tokens][google.cloud.documentai.v1beta2.Document.Page.tokens] element.
  348. TOKEN = 4;
  349. // References a [Page.visual_elements][google.cloud.documentai.v1beta2.Document.Page.visual_elements] element.
  350. VISUAL_ELEMENT = 5;
  351. // Refrrences a [Page.tables][google.cloud.documentai.v1beta2.Document.Page.tables] element.
  352. TABLE = 6;
  353. // References a [Page.form_fields][google.cloud.documentai.v1beta2.Document.Page.form_fields] element.
  354. FORM_FIELD = 7;
  355. }
  356. // Required. Index into the [Document.pages][google.cloud.documentai.v1beta2.Document.pages] element
  357. int64 page = 1 [(google.api.field_behavior) = REQUIRED];
  358. // Optional. The type of the layout element that is being referenced. If not
  359. // specified the whole page is assumed to be referenced.
  360. LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
  361. // Optional. The [Page.Layout.id][google.cloud.documentai.v1beta2.Document.Page.Layout.id] on the page that this element
  362. // references. If [LayoutRef.type][] is specified this id must also be
  363. // specified.
  364. string layout_id = 3 [(google.api.field_behavior) = OPTIONAL];
  365. }
  366. // One or more references to visual page elements
  367. repeated PageRef page_refs = 1;
  368. }
  369. // Original source document from the user.
  370. oneof source {
  371. // Currently supports Google Cloud Storage URI of the form
  372. // `gs://bucket_name/object_name`. Object versioning is not supported.
  373. // See [Google Cloud Storage Request
  374. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  375. // info.
  376. string uri = 1;
  377. // Inline document content, represented as a stream of bytes.
  378. // Note: As with all `bytes` fields, protobuffers use a pure binary
  379. // representation, whereas JSON representations use base64.
  380. bytes content = 2;
  381. }
  382. // An IANA published MIME type (also referred to as media type). For more
  383. // information, see
  384. // https://www.iana.org/assignments/media-types/media-types.xhtml.
  385. string mime_type = 3;
  386. // UTF-8 encoded text in reading order from the document.
  387. string text = 4;
  388. // Styles for the [Document.text][google.cloud.documentai.v1beta2.Document.text].
  389. repeated Style text_styles = 5;
  390. // Visual page layout for the [Document][google.cloud.documentai.v1beta2.Document].
  391. repeated Page pages = 6;
  392. // A list of entities detected on [Document.text][google.cloud.documentai.v1beta2.Document.text]. For document shards,
  393. // entities in this list may cross shard boundaries.
  394. repeated Entity entities = 7;
  395. // Relationship among [Document.entities][google.cloud.documentai.v1beta2.Document.entities].
  396. repeated EntityRelation entity_relations = 8;
  397. // Information about the sharding if this document is sharded part of a larger
  398. // document. If the document is not sharded, this message is not specified.
  399. ShardInfo shard_info = 9;
  400. // [Label][google.cloud.documentai.v1beta2.Document.Label]s for this document.
  401. repeated Label labels = 11;
  402. // Any error that occurred while processing this document.
  403. google.rpc.Status error = 10;
  404. }