io.proto 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.automl.v1beta1;
  16. import "google/api/annotations.proto";
  17. option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
  18. option java_multiple_files = true;
  19. option java_package = "com.google.cloud.automl.v1beta1";
  20. option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
  21. option ruby_package = "Google::Cloud::AutoML::V1beta1";
  22. // Input configuration for ImportData Action.
  23. //
  24. // The format of input depends on dataset_metadata the Dataset into which
  25. // the import is happening has. As input source the
  26. // [gcs_source][google.cloud.automl.v1beta1.InputConfig.gcs_source]
  27. // is expected, unless specified otherwise. Additionally any input .CSV file
  28. // by itself must be 100MB or smaller, unless specified otherwise.
  29. // If an "example" file (that is, image, video etc.) with identical content
  30. // (even if it had different GCS_FILE_PATH) is mentioned multiple times, then
  31. // its label, bounding boxes etc. are appended. The same file should be always
  32. // provided with the same ML_USE and GCS_FILE_PATH, if it is not, then
  33. // these values are nondeterministically selected from the given ones.
  34. //
  35. // The formats are represented in EBNF with commas being literal and with
  36. // non-terminal symbols defined near the end of this comment. The formats are:
  37. //
  38. // * For Image Classification:
  39. // CSV file(s) with each line in format:
  40. // ML_USE,GCS_FILE_PATH,LABEL,LABEL,...
  41. // GCS_FILE_PATH leads to image of up to 30MB in size. Supported
  42. // extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP, .TIFF, .ICO
  43. // For MULTICLASS classification type, at most one LABEL is allowed
  44. // per image. If an image has not yet been labeled, then it should be
  45. // mentioned just once with no LABEL.
  46. // Some sample rows:
  47. // TRAIN,gs://folder/image1.jpg,daisy
  48. // TEST,gs://folder/image2.jpg,dandelion,tulip,rose
  49. // UNASSIGNED,gs://folder/image3.jpg,daisy
  50. // UNASSIGNED,gs://folder/image4.jpg
  51. //
  52. // * For Image Object Detection:
  53. // CSV file(s) with each line in format:
  54. // ML_USE,GCS_FILE_PATH,(LABEL,BOUNDING_BOX | ,,,,,,,)
  55. // GCS_FILE_PATH leads to image of up to 30MB in size. Supported
  56. // extensions: .JPEG, .GIF, .PNG.
  57. // Each image is assumed to be exhaustively labeled. The minimum
  58. // allowed BOUNDING_BOX edge length is 0.01, and no more than 500
  59. // BOUNDING_BOX-es per image are allowed (one BOUNDING_BOX is defined
  60. // per line). If an image has not yet been labeled, then it should be
  61. // mentioned just once with no LABEL and the ",,,,,,," in place of the
  62. // BOUNDING_BOX. For images which are known to not contain any
  63. // bounding boxes, they should be labelled explictly as
  64. // "NEGATIVE_IMAGE", followed by ",,,,,,," in place of the
  65. // BOUNDING_BOX.
  66. // Sample rows:
  67. // TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,,
  68. // TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,,
  69. // UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3
  70. // TEST,gs://folder/im3.png,,,,,,,,,
  71. // TRAIN,gs://folder/im4.png,NEGATIVE_IMAGE,,,,,,,,,
  72. //
  73. // * For Video Classification:
  74. // CSV file(s) with each line in format:
  75. // ML_USE,GCS_FILE_PATH
  76. // where ML_USE VALIDATE value should not be used. The GCS_FILE_PATH
  77. // should lead to another .csv file which describes examples that have
  78. // given ML_USE, using the following row format:
  79. // GCS_FILE_PATH,(LABEL,TIME_SEGMENT_START,TIME_SEGMENT_END | ,,)
  80. // Here GCS_FILE_PATH leads to a video of up to 50GB in size and up
  81. // to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  82. // TIME_SEGMENT_START and TIME_SEGMENT_END must be within the
  83. // length of the video, and end has to be after the start. Any segment
  84. // of a video which has one or more labels on it, is considered a
  85. // hard negative for all other labels. Any segment with no labels on
  86. // it is considered to be unknown. If a whole video is unknown, then
  87. // it shuold be mentioned just once with ",," in place of LABEL,
  88. // TIME_SEGMENT_START,TIME_SEGMENT_END.
  89. // Sample top level CSV file:
  90. // TRAIN,gs://folder/train_videos.csv
  91. // TEST,gs://folder/test_videos.csv
  92. // UNASSIGNED,gs://folder/other_videos.csv
  93. // Sample rows of a CSV file for a particular ML_USE:
  94. // gs://folder/video1.avi,car,120,180.000021
  95. // gs://folder/video1.avi,bike,150,180.000021
  96. // gs://folder/vid2.avi,car,0,60.5
  97. // gs://folder/vid3.avi,,,
  98. //
  99. // * For Video Object Tracking:
  100. // CSV file(s) with each line in format:
  101. // ML_USE,GCS_FILE_PATH
  102. // where ML_USE VALIDATE value should not be used. The GCS_FILE_PATH
  103. // should lead to another .csv file which describes examples that have
  104. // given ML_USE, using one of the following row format:
  105. // GCS_FILE_PATH,LABEL,[INSTANCE_ID],TIMESTAMP,BOUNDING_BOX
  106. // or
  107. // GCS_FILE_PATH,,,,,,,,,,
  108. // Here GCS_FILE_PATH leads to a video of up to 50GB in size and up
  109. // to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  110. // Providing INSTANCE_IDs can help to obtain a better model. When
  111. // a specific labeled entity leaves the video frame, and shows up
  112. // afterwards it is not required, albeit preferable, that the same
  113. // INSTANCE_ID is given to it.
  114. // TIMESTAMP must be within the length of the video, the
  115. // BOUNDING_BOX is assumed to be drawn on the closest video's frame
  116. // to the TIMESTAMP. Any mentioned by the TIMESTAMP frame is expected
  117. // to be exhaustively labeled and no more than 500 BOUNDING_BOX-es per
  118. // frame are allowed. If a whole video is unknown, then it should be
  119. // mentioned just once with ",,,,,,,,,," in place of LABEL,
  120. // [INSTANCE_ID],TIMESTAMP,BOUNDING_BOX.
  121. // Sample top level CSV file:
  122. // TRAIN,gs://folder/train_videos.csv
  123. // TEST,gs://folder/test_videos.csv
  124. // UNASSIGNED,gs://folder/other_videos.csv
  125. // Seven sample rows of a CSV file for a particular ML_USE:
  126. // gs://folder/video1.avi,car,1,12.10,0.8,0.8,0.9,0.8,0.9,0.9,0.8,0.9
  127. // gs://folder/video1.avi,car,1,12.90,0.4,0.8,0.5,0.8,0.5,0.9,0.4,0.9
  128. // gs://folder/video1.avi,car,2,12.10,.4,.2,.5,.2,.5,.3,.4,.3
  129. // gs://folder/video1.avi,car,2,12.90,.8,.2,,,.9,.3,,
  130. // gs://folder/video1.avi,bike,,12.50,.45,.45,,,.55,.55,,
  131. // gs://folder/video2.avi,car,1,0,.1,.9,,,.9,.1,,
  132. // gs://folder/video2.avi,,,,,,,,,,,
  133. // * For Text Extraction:
  134. // CSV file(s) with each line in format:
  135. // ML_USE,GCS_FILE_PATH
  136. // GCS_FILE_PATH leads to a .JSONL (that is, JSON Lines) file which
  137. // either imports text in-line or as documents. Any given
  138. // .JSONL file must be 100MB or smaller.
  139. // The in-line .JSONL file contains, per line, a proto that wraps a
  140. // TextSnippet proto (in json representation) followed by one or more
  141. // AnnotationPayload protos (called annotations), which have
  142. // display_name and text_extraction detail populated. The given text
  143. // is expected to be annotated exhaustively, for example, if you look
  144. // for animals and text contains "dolphin" that is not labeled, then
  145. // "dolphin" is assumed to not be an animal. Any given text snippet
  146. // content must be 10KB or smaller, and also be UTF-8 NFC encoded
  147. // (ASCII already is).
  148. // The document .JSONL file contains, per line, a proto that wraps a
  149. // Document proto. The Document proto must have either document_text
  150. // or input_config set. In document_text case, the Document proto may
  151. // also contain the spatial information of the document, including
  152. // layout, document dimension and page number. In input_config case,
  153. // only PDF documents are supported now, and each document may be up
  154. // to 2MB large. Currently, annotations on documents cannot be
  155. // specified at import.
  156. // Three sample CSV rows:
  157. // TRAIN,gs://folder/file1.jsonl
  158. // VALIDATE,gs://folder/file2.jsonl
  159. // TEST,gs://folder/file3.jsonl
  160. // Sample in-line JSON Lines file for entity extraction (presented here
  161. // with artificial line breaks, but the only actual line break is
  162. // denoted by \n).:
  163. // {
  164. // "document": {
  165. // "document_text": {"content": "dog cat"}
  166. // "layout": [
  167. // {
  168. // "text_segment": {
  169. // "start_offset": 0,
  170. // "end_offset": 3,
  171. // },
  172. // "page_number": 1,
  173. // "bounding_poly": {
  174. // "normalized_vertices": [
  175. // {"x": 0.1, "y": 0.1},
  176. // {"x": 0.1, "y": 0.3},
  177. // {"x": 0.3, "y": 0.3},
  178. // {"x": 0.3, "y": 0.1},
  179. // ],
  180. // },
  181. // "text_segment_type": TOKEN,
  182. // },
  183. // {
  184. // "text_segment": {
  185. // "start_offset": 4,
  186. // "end_offset": 7,
  187. // },
  188. // "page_number": 1,
  189. // "bounding_poly": {
  190. // "normalized_vertices": [
  191. // {"x": 0.4, "y": 0.1},
  192. // {"x": 0.4, "y": 0.3},
  193. // {"x": 0.8, "y": 0.3},
  194. // {"x": 0.8, "y": 0.1},
  195. // ],
  196. // },
  197. // "text_segment_type": TOKEN,
  198. // }
  199. //
  200. // ],
  201. // "document_dimensions": {
  202. // "width": 8.27,
  203. // "height": 11.69,
  204. // "unit": INCH,
  205. // }
  206. // "page_count": 1,
  207. // },
  208. // "annotations": [
  209. // {
  210. // "display_name": "animal",
  211. // "text_extraction": {"text_segment": {"start_offset": 0,
  212. // "end_offset": 3}}
  213. // },
  214. // {
  215. // "display_name": "animal",
  216. // "text_extraction": {"text_segment": {"start_offset": 4,
  217. // "end_offset": 7}}
  218. // }
  219. // ],
  220. // }\n
  221. // {
  222. // "text_snippet": {
  223. // "content": "This dog is good."
  224. // },
  225. // "annotations": [
  226. // {
  227. // "display_name": "animal",
  228. // "text_extraction": {
  229. // "text_segment": {"start_offset": 5, "end_offset": 8}
  230. // }
  231. // }
  232. // ]
  233. // }
  234. // Sample document JSON Lines file (presented here with artificial line
  235. // breaks, but the only actual line break is denoted by \n).:
  236. // {
  237. // "document": {
  238. // "input_config": {
  239. // "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
  240. // }
  241. // }
  242. // }
  243. // }\n
  244. // {
  245. // "document": {
  246. // "input_config": {
  247. // "gcs_source": { "input_uris": [ "gs://folder/document2.pdf" ]
  248. // }
  249. // }
  250. // }
  251. // }
  252. //
  253. // * For Text Classification:
  254. // CSV file(s) with each line in format:
  255. // ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,...
  256. // TEXT_SNIPPET and GCS_FILE_PATH are distinguished by a pattern. If
  257. // the column content is a valid gcs file path, i.e. prefixed by
  258. // "gs://", it will be treated as a GCS_FILE_PATH, else if the content
  259. // is enclosed within double quotes (""), it is
  260. // treated as a TEXT_SNIPPET. In the GCS_FILE_PATH case, the path
  261. // must lead to a .txt file with UTF-8 encoding, for example,
  262. // "gs://folder/content.txt", and the content in it is extracted
  263. // as a text snippet. In TEXT_SNIPPET case, the column content
  264. // excluding quotes is treated as to be imported text snippet. In
  265. // both cases, the text snippet/file size must be within 128kB.
  266. // Maximum 100 unique labels are allowed per CSV row.
  267. // Sample rows:
  268. // TRAIN,"They have bad food and very rude",RudeService,BadFood
  269. // TRAIN,gs://folder/content.txt,SlowService
  270. // TEST,"Typically always bad service there.",RudeService
  271. // VALIDATE,"Stomach ache to go.",BadFood
  272. //
  273. // * For Text Sentiment:
  274. // CSV file(s) with each line in format:
  275. // ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT
  276. // TEXT_SNIPPET and GCS_FILE_PATH are distinguished by a pattern. If
  277. // the column content is a valid gcs file path, that is, prefixed by
  278. // "gs://", it is treated as a GCS_FILE_PATH, otherwise it is treated
  279. // as a TEXT_SNIPPET. In the GCS_FILE_PATH case, the path
  280. // must lead to a .txt file with UTF-8 encoding, for example,
  281. // "gs://folder/content.txt", and the content in it is extracted
  282. // as a text snippet. In TEXT_SNIPPET case, the column content itself
  283. // is treated as to be imported text snippet. In both cases, the
  284. // text snippet must be up to 500 characters long.
  285. // Sample rows:
  286. // TRAIN,"@freewrytin this is way too good for your product",2
  287. // TRAIN,"I need this product so bad",3
  288. // TEST,"Thank you for this product.",4
  289. // VALIDATE,gs://folder/content.txt,2
  290. //
  291. // * For Tables:
  292. // Either
  293. // [gcs_source][google.cloud.automl.v1beta1.InputConfig.gcs_source] or
  294. //
  295. // [bigquery_source][google.cloud.automl.v1beta1.InputConfig.bigquery_source]
  296. // can be used. All inputs is concatenated into a single
  297. //
  298. // [primary_table][google.cloud.automl.v1beta1.TablesDatasetMetadata.primary_table_name]
  299. // For gcs_source:
  300. // CSV file(s), where the first row of the first file is the header,
  301. // containing unique column names. If the first row of a subsequent
  302. // file is the same as the header, then it is also treated as a
  303. // header. All other rows contain values for the corresponding
  304. // columns.
  305. // Each .CSV file by itself must be 10GB or smaller, and their total
  306. // size must be 100GB or smaller.
  307. // First three sample rows of a CSV file:
  308. // "Id","First Name","Last Name","Dob","Addresses"
  309. //
  310. // "1","John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
  311. //
  312. // "2","Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
  313. // For bigquery_source:
  314. // An URI of a BigQuery table. The user data size of the BigQuery
  315. // table must be 100GB or smaller.
  316. // An imported table must have between 2 and 1,000 columns, inclusive,
  317. // and between 1000 and 100,000,000 rows, inclusive. There are at most 5
  318. // import data running in parallel.
  319. // Definitions:
  320. // ML_USE = "TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED"
  321. // Describes how the given example (file) should be used for model
  322. // training. "UNASSIGNED" can be used when user has no preference.
  323. // GCS_FILE_PATH = A path to file on GCS, e.g. "gs://folder/image1.png".
  324. // LABEL = A display name of an object on an image, video etc., e.g. "dog".
  325. // Must be up to 32 characters long and can consist only of ASCII
  326. // Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9.
  327. // For each label an AnnotationSpec is created which display_name
  328. // becomes the label; AnnotationSpecs are given back in predictions.
  329. // INSTANCE_ID = A positive integer that identifies a specific instance of a
  330. // labeled entity on an example. Used e.g. to track two cars on
  331. // a video while being able to tell apart which one is which.
  332. // BOUNDING_BOX = VERTEX,VERTEX,VERTEX,VERTEX | VERTEX,,,VERTEX,,
  333. // A rectangle parallel to the frame of the example (image,
  334. // video). If 4 vertices are given they are connected by edges
  335. // in the order provided, if 2 are given they are recognized
  336. // as diagonally opposite vertices of the rectangle.
  337. // VERTEX = COORDINATE,COORDINATE
  338. // First coordinate is horizontal (x), the second is vertical (y).
  339. // COORDINATE = A float in 0 to 1 range, relative to total length of
  340. // image or video in given dimension. For fractions the
  341. // leading non-decimal 0 can be omitted (i.e. 0.3 = .3).
  342. // Point 0,0 is in top left.
  343. // TIME_SEGMENT_START = TIME_OFFSET
  344. // Expresses a beginning, inclusive, of a time segment
  345. // within an example that has a time dimension
  346. // (e.g. video).
  347. // TIME_SEGMENT_END = TIME_OFFSET
  348. // Expresses an end, exclusive, of a time segment within
  349. // an example that has a time dimension (e.g. video).
  350. // TIME_OFFSET = A number of seconds as measured from the start of an
  351. // example (e.g. video). Fractions are allowed, up to a
  352. // microsecond precision. "inf" is allowed, and it means the end
  353. // of the example.
  354. // TEXT_SNIPPET = A content of a text snippet, UTF-8 encoded, enclosed within
  355. // double quotes ("").
  356. // SENTIMENT = An integer between 0 and
  357. // Dataset.text_sentiment_dataset_metadata.sentiment_max
  358. // (inclusive). Describes the ordinal of the sentiment - higher
  359. // value means a more positive sentiment. All the values are
  360. // completely relative, i.e. neither 0 needs to mean a negative or
  361. // neutral sentiment nor sentiment_max needs to mean a positive one
  362. // - it is just required that 0 is the least positive sentiment
  363. // in the data, and sentiment_max is the most positive one.
  364. // The SENTIMENT shouldn't be confused with "score" or "magnitude"
  365. // from the previous Natural Language Sentiment Analysis API.
  366. // All SENTIMENT values between 0 and sentiment_max must be
  367. // represented in the imported data. On prediction the same 0 to
  368. // sentiment_max range will be used. The difference between
  369. // neighboring sentiment values needs not to be uniform, e.g. 1 and
  370. // 2 may be similar whereas the difference between 2 and 3 may be
  371. // huge.
  372. //
  373. // Errors:
  374. // If any of the provided CSV files can't be parsed or if more than certain
  375. // percent of CSV rows cannot be processed then the operation fails and
  376. // nothing is imported. Regardless of overall success or failure the per-row
  377. // failures, up to a certain count cap, is listed in
  378. // Operation.metadata.partial_failures.
  379. //
  380. message InputConfig {
  381. // The source of the input.
  382. oneof source {
  383. // The Google Cloud Storage location for the input content.
  384. // In ImportData, the gcs_source points to a csv with structure described in
  385. // the comment.
  386. GcsSource gcs_source = 1;
  387. // The BigQuery location for the input content.
  388. BigQuerySource bigquery_source = 3;
  389. }
  390. // Additional domain-specific parameters describing the semantic of the
  391. // imported data, any string must be up to 25000
  392. // characters long.
  393. //
  394. // * For Tables:
  395. // `schema_inference_version` - (integer) Required. The version of the
  396. // algorithm that should be used for the initial inference of the
  397. // schema (columns' DataTypes) of the table the data is being imported
  398. // into. Allowed values: "1".
  399. map<string, string> params = 2;
  400. }
  401. // Input configuration for BatchPredict Action.
  402. //
  403. // The format of input depends on the ML problem of the model used for
  404. // prediction. As input source the
  405. // [gcs_source][google.cloud.automl.v1beta1.InputConfig.gcs_source]
  406. // is expected, unless specified otherwise.
  407. //
  408. // The formats are represented in EBNF with commas being literal and with
  409. // non-terminal symbols defined near the end of this comment. The formats
  410. // are:
  411. //
  412. // * For Image Classification:
  413. // CSV file(s) with each line having just a single column:
  414. // GCS_FILE_PATH
  415. // which leads to image of up to 30MB in size. Supported
  416. // extensions: .JPEG, .GIF, .PNG. This path is treated as the ID in
  417. // the Batch predict output.
  418. // Three sample rows:
  419. // gs://folder/image1.jpeg
  420. // gs://folder/image2.gif
  421. // gs://folder/image3.png
  422. //
  423. // * For Image Object Detection:
  424. // CSV file(s) with each line having just a single column:
  425. // GCS_FILE_PATH
  426. // which leads to image of up to 30MB in size. Supported
  427. // extensions: .JPEG, .GIF, .PNG. This path is treated as the ID in
  428. // the Batch predict output.
  429. // Three sample rows:
  430. // gs://folder/image1.jpeg
  431. // gs://folder/image2.gif
  432. // gs://folder/image3.png
  433. // * For Video Classification:
  434. // CSV file(s) with each line in format:
  435. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
  436. // GCS_FILE_PATH leads to video of up to 50GB in size and up to 3h
  437. // duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  438. // TIME_SEGMENT_START and TIME_SEGMENT_END must be within the
  439. // length of the video, and end has to be after the start.
  440. // Three sample rows:
  441. // gs://folder/video1.mp4,10,40
  442. // gs://folder/video1.mp4,20,60
  443. // gs://folder/vid2.mov,0,inf
  444. //
  445. // * For Video Object Tracking:
  446. // CSV file(s) with each line in format:
  447. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
  448. // GCS_FILE_PATH leads to video of up to 50GB in size and up to 3h
  449. // duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  450. // TIME_SEGMENT_START and TIME_SEGMENT_END must be within the
  451. // length of the video, and end has to be after the start.
  452. // Three sample rows:
  453. // gs://folder/video1.mp4,10,240
  454. // gs://folder/video1.mp4,300,360
  455. // gs://folder/vid2.mov,0,inf
  456. // * For Text Classification:
  457. // CSV file(s) with each line having just a single column:
  458. // GCS_FILE_PATH | TEXT_SNIPPET
  459. // Any given text file can have size upto 128kB.
  460. // Any given text snippet content must have 60,000 characters or less.
  461. // Three sample rows:
  462. // gs://folder/text1.txt
  463. // "Some text content to predict"
  464. // gs://folder/text3.pdf
  465. // Supported file extensions: .txt, .pdf
  466. //
  467. // * For Text Sentiment:
  468. // CSV file(s) with each line having just a single column:
  469. // GCS_FILE_PATH | TEXT_SNIPPET
  470. // Any given text file can have size upto 128kB.
  471. // Any given text snippet content must have 500 characters or less.
  472. // Three sample rows:
  473. // gs://folder/text1.txt
  474. // "Some text content to predict"
  475. // gs://folder/text3.pdf
  476. // Supported file extensions: .txt, .pdf
  477. //
  478. // * For Text Extraction
  479. // .JSONL (i.e. JSON Lines) file(s) which either provide text in-line or
  480. // as documents (for a single BatchPredict call only one of the these
  481. // formats may be used).
  482. // The in-line .JSONL file(s) contain per line a proto that
  483. // wraps a temporary user-assigned TextSnippet ID (string up to 2000
  484. // characters long) called "id", a TextSnippet proto (in
  485. // json representation) and zero or more TextFeature protos. Any given
  486. // text snippet content must have 30,000 characters or less, and also
  487. // be UTF-8 NFC encoded (ASCII already is). The IDs provided should be
  488. // unique.
  489. // The document .JSONL file(s) contain, per line, a proto that wraps a
  490. // Document proto with input_config set. Only PDF documents are
  491. // supported now, and each document must be up to 2MB large.
  492. // Any given .JSONL file must be 100MB or smaller, and no more than 20
  493. // files may be given.
  494. // Sample in-line JSON Lines file (presented here with artificial line
  495. // breaks, but the only actual line break is denoted by \n):
  496. // {
  497. // "id": "my_first_id",
  498. // "text_snippet": { "content": "dog car cat"},
  499. // "text_features": [
  500. // {
  501. // "text_segment": {"start_offset": 4, "end_offset": 6},
  502. // "structural_type": PARAGRAPH,
  503. // "bounding_poly": {
  504. // "normalized_vertices": [
  505. // {"x": 0.1, "y": 0.1},
  506. // {"x": 0.1, "y": 0.3},
  507. // {"x": 0.3, "y": 0.3},
  508. // {"x": 0.3, "y": 0.1},
  509. // ]
  510. // },
  511. // }
  512. // ],
  513. // }\n
  514. // {
  515. // "id": "2",
  516. // "text_snippet": {
  517. // "content": "An elaborate content",
  518. // "mime_type": "text/plain"
  519. // }
  520. // }
  521. // Sample document JSON Lines file (presented here with artificial line
  522. // breaks, but the only actual line break is denoted by \n).:
  523. // {
  524. // "document": {
  525. // "input_config": {
  526. // "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
  527. // }
  528. // }
  529. // }
  530. // }\n
  531. // {
  532. // "document": {
  533. // "input_config": {
  534. // "gcs_source": { "input_uris": [ "gs://folder/document2.pdf" ]
  535. // }
  536. // }
  537. // }
  538. // }
  539. //
  540. // * For Tables:
  541. // Either
  542. // [gcs_source][google.cloud.automl.v1beta1.InputConfig.gcs_source] or
  543. //
  544. // [bigquery_source][google.cloud.automl.v1beta1.InputConfig.bigquery_source].
  545. // GCS case:
  546. // CSV file(s), each by itself 10GB or smaller and total size must be
  547. // 100GB or smaller, where first file must have a header containing
  548. // column names. If the first row of a subsequent file is the same as
  549. // the header, then it is also treated as a header. All other rows
  550. // contain values for the corresponding columns.
  551. // The column names must contain the model's
  552. //
  553. // [input_feature_column_specs'][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs]
  554. //
  555. // [display_name-s][google.cloud.automl.v1beta1.ColumnSpec.display_name]
  556. // (order doesn't matter). The columns corresponding to the model's
  557. // input feature column specs must contain values compatible with the
  558. // column spec's data types. Prediction on all the rows, i.e. the CSV
  559. // lines, will be attempted. For FORECASTING
  560. //
  561. // [prediction_type][google.cloud.automl.v1beta1.TablesModelMetadata.prediction_type]:
  562. // all columns having
  563. //
  564. // [TIME_SERIES_AVAILABLE_PAST_ONLY][google.cloud.automl.v1beta1.ColumnSpec.ForecastingMetadata.ColumnType]
  565. // type will be ignored.
  566. // First three sample rows of a CSV file:
  567. // "First Name","Last Name","Dob","Addresses"
  568. //
  569. // "John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
  570. //
  571. // "Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
  572. // BigQuery case:
  573. // An URI of a BigQuery table. The user data size of the BigQuery
  574. // table must be 100GB or smaller.
  575. // The column names must contain the model's
  576. //
  577. // [input_feature_column_specs'][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs]
  578. //
  579. // [display_name-s][google.cloud.automl.v1beta1.ColumnSpec.display_name]
  580. // (order doesn't matter). The columns corresponding to the model's
  581. // input feature column specs must contain values compatible with the
  582. // column spec's data types. Prediction on all the rows of the table
  583. // will be attempted. For FORECASTING
  584. //
  585. // [prediction_type][google.cloud.automl.v1beta1.TablesModelMetadata.prediction_type]:
  586. // all columns having
  587. //
  588. // [TIME_SERIES_AVAILABLE_PAST_ONLY][google.cloud.automl.v1beta1.ColumnSpec.ForecastingMetadata.ColumnType]
  589. // type will be ignored.
  590. //
  591. // Definitions:
  592. // GCS_FILE_PATH = A path to file on GCS, e.g. "gs://folder/video.avi".
  593. // TEXT_SNIPPET = A content of a text snippet, UTF-8 encoded, enclosed within
  594. // double quotes ("")
  595. // TIME_SEGMENT_START = TIME_OFFSET
  596. // Expresses a beginning, inclusive, of a time segment
  597. // within an
  598. // example that has a time dimension (e.g. video).
  599. // TIME_SEGMENT_END = TIME_OFFSET
  600. // Expresses an end, exclusive, of a time segment within
  601. // an example that has a time dimension (e.g. video).
  602. // TIME_OFFSET = A number of seconds as measured from the start of an
  603. // example (e.g. video). Fractions are allowed, up to a
  604. // microsecond precision. "inf" is allowed and it means the end
  605. // of the example.
  606. //
  607. // Errors:
  608. // If any of the provided CSV files can't be parsed or if more than certain
  609. // percent of CSV rows cannot be processed then the operation fails and
  610. // prediction does not happen. Regardless of overall success or failure the
  611. // per-row failures, up to a certain count cap, will be listed in
  612. // Operation.metadata.partial_failures.
  613. message BatchPredictInputConfig {
  614. // Required. The source of the input.
  615. oneof source {
  616. // The Google Cloud Storage location for the input content.
  617. GcsSource gcs_source = 1;
  618. // The BigQuery location for the input content.
  619. BigQuerySource bigquery_source = 2;
  620. }
  621. }
  622. // Input configuration of a [Document][google.cloud.automl.v1beta1.Document].
  623. message DocumentInputConfig {
  624. // The Google Cloud Storage location of the document file. Only a single path
  625. // should be given.
  626. // Max supported size: 512MB.
  627. // Supported extensions: .PDF.
  628. GcsSource gcs_source = 1;
  629. }
  630. // * For Translation:
  631. // CSV file `translation.csv`, with each line in format:
  632. // ML_USE,GCS_FILE_PATH
  633. // GCS_FILE_PATH leads to a .TSV file which describes examples that have
  634. // given ML_USE, using the following row format per line:
  635. // TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target
  636. // language)
  637. //
  638. // * For Tables:
  639. // Output depends on whether the dataset was imported from GCS or
  640. // BigQuery.
  641. // GCS case:
  642. //
  643. // [gcs_destination][google.cloud.automl.v1beta1.OutputConfig.gcs_destination]
  644. // must be set. Exported are CSV file(s) `tables_1.csv`,
  645. // `tables_2.csv`,...,`tables_N.csv` with each having as header line
  646. // the table's column names, and all other lines contain values for
  647. // the header columns.
  648. // BigQuery case:
  649. //
  650. // [bigquery_destination][google.cloud.automl.v1beta1.OutputConfig.bigquery_destination]
  651. // pointing to a BigQuery project must be set. In the given project a
  652. // new dataset will be created with name
  653. //
  654. // `export_data_<automl-dataset-display-name>_<timestamp-of-export-call>`
  655. // where <automl-dataset-display-name> will be made
  656. // BigQuery-dataset-name compatible (e.g. most special characters will
  657. // become underscores), and timestamp will be in
  658. // YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In that
  659. // dataset a new table called `primary_table` will be created, and
  660. // filled with precisely the same data as this obtained on import.
  661. message OutputConfig {
  662. // Required. The destination of the output.
  663. oneof destination {
  664. // The Google Cloud Storage location where the output is to be written to.
  665. // For Image Object Detection, Text Extraction, Video Classification and
  666. // Tables, in the given directory a new directory will be created with name:
  667. // export_data-<dataset-display-name>-<timestamp-of-export-call> where
  668. // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
  669. // output will be written into that directory.
  670. GcsDestination gcs_destination = 1;
  671. // The BigQuery location where the output is to be written to.
  672. BigQueryDestination bigquery_destination = 2;
  673. }
  674. }
  675. // Output configuration for BatchPredict Action.
  676. //
  677. // As destination the
  678. //
  679. // [gcs_destination][google.cloud.automl.v1beta1.BatchPredictOutputConfig.gcs_destination]
  680. // must be set unless specified otherwise for a domain. If gcs_destination is
  681. // set then in the given directory a new directory is created. Its name
  682. // will be
  683. // "prediction-<model-display-name>-<timestamp-of-prediction-call>",
  684. // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents
  685. // of it depends on the ML problem the predictions are made for.
  686. //
  687. // * For Image Classification:
  688. // In the created directory files `image_classification_1.jsonl`,
  689. // `image_classification_2.jsonl`,...,`image_classification_N.jsonl`
  690. // will be created, where N may be 1, and depends on the
  691. // total number of the successfully predicted images and annotations.
  692. // A single image will be listed only once with all its annotations,
  693. // and its annotations will never be split across files.
  694. // Each .JSONL file will contain, per line, a JSON representation of a
  695. // proto that wraps image's "ID" : "<id_value>" followed by a list of
  696. // zero or more AnnotationPayload protos (called annotations), which
  697. // have classification detail populated.
  698. // If prediction for any image failed (partially or completely), then an
  699. // additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
  700. // files will be created (N depends on total number of failed
  701. // predictions). These files will have a JSON representation of a proto
  702. // that wraps the same "ID" : "<id_value>" but here followed by
  703. // exactly one
  704. //
  705. // [`google.rpc.Status`](https:
  706. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  707. // containing only `code` and `message`fields.
  708. //
  709. // * For Image Object Detection:
  710. // In the created directory files `image_object_detection_1.jsonl`,
  711. // `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl`
  712. // will be created, where N may be 1, and depends on the
  713. // total number of the successfully predicted images and annotations.
  714. // Each .JSONL file will contain, per line, a JSON representation of a
  715. // proto that wraps image's "ID" : "<id_value>" followed by a list of
  716. // zero or more AnnotationPayload protos (called annotations), which
  717. // have image_object_detection detail populated. A single image will
  718. // be listed only once with all its annotations, and its annotations
  719. // will never be split across files.
  720. // If prediction for any image failed (partially or completely), then
  721. // additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
  722. // files will be created (N depends on total number of failed
  723. // predictions). These files will have a JSON representation of a proto
  724. // that wraps the same "ID" : "<id_value>" but here followed by
  725. // exactly one
  726. //
  727. // [`google.rpc.Status`](https:
  728. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  729. // containing only `code` and `message`fields.
  730. // * For Video Classification:
  731. // In the created directory a video_classification.csv file, and a .JSON
  732. // file per each video classification requested in the input (i.e. each
  733. // line in given CSV(s)), will be created.
  734. //
  735. // The format of video_classification.csv is:
  736. //
  737. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
  738. // where:
  739. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
  740. // the prediction input lines (i.e. video_classification.csv has
  741. // precisely the same number of lines as the prediction input had.)
  742. // JSON_FILE_NAME = Name of .JSON file in the output directory, which
  743. // contains prediction responses for the video time segment.
  744. // STATUS = "OK" if prediction completed successfully, or an error code
  745. // with message otherwise. If STATUS is not "OK" then the .JSON file
  746. // for that line may not exist or be empty.
  747. //
  748. // Each .JSON file, assuming STATUS is "OK", will contain a list of
  749. // AnnotationPayload protos in JSON format, which are the predictions
  750. // for the video time segment the file is assigned to in the
  751. // video_classification.csv. All AnnotationPayload protos will have
  752. // video_classification field set, and will be sorted by
  753. // video_classification.type field (note that the returned types are
  754. // governed by `classifaction_types` parameter in
  755. // [PredictService.BatchPredictRequest.params][]).
  756. //
  757. // * For Video Object Tracking:
  758. // In the created directory a video_object_tracking.csv file will be
  759. // created, and multiple files video_object_trackinng_1.json,
  760. // video_object_trackinng_2.json,..., video_object_trackinng_N.json,
  761. // where N is the number of requests in the input (i.e. the number of
  762. // lines in given CSV(s)).
  763. //
  764. // The format of video_object_tracking.csv is:
  765. //
  766. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
  767. // where:
  768. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
  769. // the prediction input lines (i.e. video_object_tracking.csv has
  770. // precisely the same number of lines as the prediction input had.)
  771. // JSON_FILE_NAME = Name of .JSON file in the output directory, which
  772. // contains prediction responses for the video time segment.
  773. // STATUS = "OK" if prediction completed successfully, or an error
  774. // code with message otherwise. If STATUS is not "OK" then the .JSON
  775. // file for that line may not exist or be empty.
  776. //
  777. // Each .JSON file, assuming STATUS is "OK", will contain a list of
  778. // AnnotationPayload protos in JSON format, which are the predictions
  779. // for each frame of the video time segment the file is assigned to in
  780. // video_object_tracking.csv. All AnnotationPayload protos will have
  781. // video_object_tracking field set.
  782. // * For Text Classification:
  783. // In the created directory files `text_classification_1.jsonl`,
  784. // `text_classification_2.jsonl`,...,`text_classification_N.jsonl`
  785. // will be created, where N may be 1, and depends on the
  786. // total number of inputs and annotations found.
  787. //
  788. // Each .JSONL file will contain, per line, a JSON representation of a
  789. // proto that wraps input text snippet or input text file and a list of
  790. // zero or more AnnotationPayload protos (called annotations), which
  791. // have classification detail populated. A single text snippet or file
  792. // will be listed only once with all its annotations, and its
  793. // annotations will never be split across files.
  794. //
  795. // If prediction for any text snippet or file failed (partially or
  796. // completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
  797. // `errors_N.jsonl` files will be created (N depends on total number of
  798. // failed predictions). These files will have a JSON representation of a
  799. // proto that wraps input text snippet or input text file followed by
  800. // exactly one
  801. //
  802. // [`google.rpc.Status`](https:
  803. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  804. // containing only `code` and `message`.
  805. //
  806. // * For Text Sentiment:
  807. // In the created directory files `text_sentiment_1.jsonl`,
  808. // `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl`
  809. // will be created, where N may be 1, and depends on the
  810. // total number of inputs and annotations found.
  811. //
  812. // Each .JSONL file will contain, per line, a JSON representation of a
  813. // proto that wraps input text snippet or input text file and a list of
  814. // zero or more AnnotationPayload protos (called annotations), which
  815. // have text_sentiment detail populated. A single text snippet or file
  816. // will be listed only once with all its annotations, and its
  817. // annotations will never be split across files.
  818. //
  819. // If prediction for any text snippet or file failed (partially or
  820. // completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
  821. // `errors_N.jsonl` files will be created (N depends on total number of
  822. // failed predictions). These files will have a JSON representation of a
  823. // proto that wraps input text snippet or input text file followed by
  824. // exactly one
  825. //
  826. // [`google.rpc.Status`](https:
  827. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  828. // containing only `code` and `message`.
  829. //
  830. // * For Text Extraction:
  831. // In the created directory files `text_extraction_1.jsonl`,
  832. // `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl`
  833. // will be created, where N may be 1, and depends on the
  834. // total number of inputs and annotations found.
  835. // The contents of these .JSONL file(s) depend on whether the input
  836. // used inline text, or documents.
  837. // If input was inline, then each .JSONL file will contain, per line,
  838. // a JSON representation of a proto that wraps given in request text
  839. // snippet's "id" (if specified), followed by input text snippet,
  840. // and a list of zero or more
  841. // AnnotationPayload protos (called annotations), which have
  842. // text_extraction detail populated. A single text snippet will be
  843. // listed only once with all its annotations, and its annotations will
  844. // never be split across files.
  845. // If input used documents, then each .JSONL file will contain, per
  846. // line, a JSON representation of a proto that wraps given in request
  847. // document proto, followed by its OCR-ed representation in the form
  848. // of a text snippet, finally followed by a list of zero or more
  849. // AnnotationPayload protos (called annotations), which have
  850. // text_extraction detail populated and refer, via their indices, to
  851. // the OCR-ed text snippet. A single document (and its text snippet)
  852. // will be listed only once with all its annotations, and its
  853. // annotations will never be split across files.
  854. // If prediction for any text snippet failed (partially or completely),
  855. // then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
  856. // `errors_N.jsonl` files will be created (N depends on total number of
  857. // failed predictions). These files will have a JSON representation of a
  858. // proto that wraps either the "id" : "<id_value>" (in case of inline)
  859. // or the document proto (in case of document) but here followed by
  860. // exactly one
  861. //
  862. // [`google.rpc.Status`](https:
  863. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  864. // containing only `code` and `message`.
  865. //
  866. // * For Tables:
  867. // Output depends on whether
  868. //
  869. // [gcs_destination][google.cloud.automl.v1beta1.BatchPredictOutputConfig.gcs_destination]
  870. // or
  871. //
  872. // [bigquery_destination][google.cloud.automl.v1beta1.BatchPredictOutputConfig.bigquery_destination]
  873. // is set (either is allowed).
  874. // GCS case:
  875. // In the created directory files `tables_1.csv`, `tables_2.csv`,...,
  876. // `tables_N.csv` will be created, where N may be 1, and depends on
  877. // the total number of the successfully predicted rows.
  878. // For all CLASSIFICATION
  879. //
  880. // [prediction_type-s][google.cloud.automl.v1beta1.TablesModelMetadata.prediction_type]:
  881. // Each .csv file will contain a header, listing all columns'
  882. //
  883. // [display_name-s][google.cloud.automl.v1beta1.ColumnSpec.display_name]
  884. // given on input followed by M target column names in the format of
  885. //
  886. // "<[target_column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  887. //
  888. // [display_name][google.cloud.automl.v1beta1.ColumnSpec.display_name]>_<target
  889. // value>_score" where M is the number of distinct target values,
  890. // i.e. number of distinct values in the target column of the table
  891. // used to train the model. Subsequent lines will contain the
  892. // respective values of successfully predicted rows, with the last,
  893. // i.e. the target, columns having the corresponding prediction
  894. // [scores][google.cloud.automl.v1beta1.TablesAnnotation.score].
  895. // For REGRESSION and FORECASTING
  896. //
  897. // [prediction_type-s][google.cloud.automl.v1beta1.TablesModelMetadata.prediction_type]:
  898. // Each .csv file will contain a header, listing all columns'
  899. // [display_name-s][google.cloud.automl.v1beta1.display_name] given
  900. // on input followed by the predicted target column with name in the
  901. // format of
  902. //
  903. // "predicted_<[target_column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  904. //
  905. // [display_name][google.cloud.automl.v1beta1.ColumnSpec.display_name]>"
  906. // Subsequent lines will contain the respective values of
  907. // successfully predicted rows, with the last, i.e. the target,
  908. // column having the predicted target value.
  909. // If prediction for any rows failed, then an additional
  910. // `errors_1.csv`, `errors_2.csv`,..., `errors_N.csv` will be
  911. // created (N depends on total number of failed rows). These files
  912. // will have analogous format as `tables_*.csv`, but always with a
  913. // single target column having
  914. //
  915. // [`google.rpc.Status`](https:
  916. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  917. // represented as a JSON string, and containing only `code` and
  918. // `message`.
  919. // BigQuery case:
  920. //
  921. // [bigquery_destination][google.cloud.automl.v1beta1.OutputConfig.bigquery_destination]
  922. // pointing to a BigQuery project must be set. In the given project a
  923. // new dataset will be created with name
  924. // `prediction_<model-display-name>_<timestamp-of-prediction-call>`
  925. // where <model-display-name> will be made
  926. // BigQuery-dataset-name compatible (e.g. most special characters will
  927. // become underscores), and timestamp will be in
  928. // YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset
  929. // two tables will be created, `predictions`, and `errors`.
  930. // The `predictions` table's column names will be the input columns'
  931. //
  932. // [display_name-s][google.cloud.automl.v1beta1.ColumnSpec.display_name]
  933. // followed by the target column with name in the format of
  934. //
  935. // "predicted_<[target_column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  936. //
  937. // [display_name][google.cloud.automl.v1beta1.ColumnSpec.display_name]>"
  938. // The input feature columns will contain the respective values of
  939. // successfully predicted rows, with the target column having an
  940. // ARRAY of
  941. //
  942. // [AnnotationPayloads][google.cloud.automl.v1beta1.AnnotationPayload],
  943. // represented as STRUCT-s, containing
  944. // [TablesAnnotation][google.cloud.automl.v1beta1.TablesAnnotation].
  945. // The `errors` table contains rows for which the prediction has
  946. // failed, it has analogous input columns while the target column name
  947. // is in the format of
  948. //
  949. // "errors_<[target_column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  950. //
  951. // [display_name][google.cloud.automl.v1beta1.ColumnSpec.display_name]>",
  952. // and as a value has
  953. //
  954. // [`google.rpc.Status`](https:
  955. // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  956. // represented as a STRUCT, and containing only `code` and `message`.
  957. message BatchPredictOutputConfig {
  958. // Required. The destination of the output.
  959. oneof destination {
  960. // The Google Cloud Storage location of the directory where the output is to
  961. // be written to.
  962. GcsDestination gcs_destination = 1;
  963. // The BigQuery location where the output is to be written to.
  964. BigQueryDestination bigquery_destination = 2;
  965. }
  966. }
  967. // Output configuration for ModelExport Action.
  968. message ModelExportOutputConfig {
  969. // Required. The destination of the output.
  970. oneof destination {
  971. // The Google Cloud Storage location where the model is to be written to.
  972. // This location may only be set for the following model formats:
  973. // "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml".
  974. //
  975. // Under the directory given as the destination a new one with name
  976. // "model-export-<model-display-name>-<timestamp-of-export-call>",
  977. // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format,
  978. // will be created. Inside the model and any of its supporting files
  979. // will be written.
  980. GcsDestination gcs_destination = 1;
  981. // The GCR location where model image is to be pushed to. This location
  982. // may only be set for the following model formats:
  983. // "docker".
  984. //
  985. // The model image will be created under the given URI.
  986. GcrDestination gcr_destination = 3;
  987. }
  988. // The format in which the model must be exported. The available, and default,
  989. // formats depend on the problem and model type (if given problem and type
  990. // combination doesn't have a format listed, it means its models are not
  991. // exportable):
  992. //
  993. // * For Image Classification mobile-low-latency-1, mobile-versatile-1,
  994. // mobile-high-accuracy-1:
  995. // "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js",
  996. // "docker".
  997. //
  998. // * For Image Classification mobile-core-ml-low-latency-1,
  999. // mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1:
  1000. // "core_ml" (default).
  1001. //
  1002. // * For Image Object Detection mobile-low-latency-1, mobile-versatile-1,
  1003. // mobile-high-accuracy-1:
  1004. // "tflite", "tf_saved_model", "tf_js".
  1005. //
  1006. // * For Video Classification cloud,
  1007. // "tf_saved_model".
  1008. //
  1009. // * For Video Object Tracking cloud,
  1010. // "tf_saved_model".
  1011. //
  1012. // * For Video Object Tracking mobile-versatile-1:
  1013. // "tflite", "edgetpu_tflite", "tf_saved_model", "docker".
  1014. //
  1015. // * For Video Object Tracking mobile-coral-versatile-1:
  1016. // "tflite", "edgetpu_tflite", "docker".
  1017. //
  1018. // * For Video Object Tracking mobile-coral-low-latency-1:
  1019. // "tflite", "edgetpu_tflite", "docker".
  1020. //
  1021. // * For Video Object Tracking mobile-jetson-versatile-1:
  1022. // "tf_saved_model", "docker".
  1023. //
  1024. // * For Tables:
  1025. // "docker".
  1026. //
  1027. // Formats description:
  1028. //
  1029. // * tflite - Used for Android mobile devices.
  1030. // * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/)
  1031. // devices.
  1032. // * tf_saved_model - A tensorflow model in SavedModel format.
  1033. // * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can
  1034. // be used in the browser and in Node.js using JavaScript.
  1035. // * docker - Used for Docker containers. Use the params field to customize
  1036. // the container. The container is verified to work correctly on
  1037. // ubuntu 16.04 operating system. See more at
  1038. // [containers
  1039. //
  1040. // quickstart](https:
  1041. // //cloud.google.com/vision/automl/docs/containers-gcs-quickstart)
  1042. // * core_ml - Used for iOS mobile devices.
  1043. string model_format = 4;
  1044. // Additional model-type and format specific parameters describing the
  1045. // requirements for the to be exported model files, any string must be up to
  1046. // 25000 characters long.
  1047. //
  1048. // * For `docker` format:
  1049. // `cpu_architecture` - (string) "x86_64" (default).
  1050. // `gpu_architecture` - (string) "none" (default), "nvidia".
  1051. map<string, string> params = 2;
  1052. }
  1053. // Output configuration for ExportEvaluatedExamples Action. Note that this call
  1054. // is available only for 30 days since the moment the model was evaluated.
  1055. // The output depends on the domain, as follows (note that only examples from
  1056. // the TEST set are exported):
  1057. //
  1058. // * For Tables:
  1059. //
  1060. // [bigquery_destination][google.cloud.automl.v1beta1.OutputConfig.bigquery_destination]
  1061. // pointing to a BigQuery project must be set. In the given project a
  1062. // new dataset will be created with name
  1063. //
  1064. // `export_evaluated_examples_<model-display-name>_<timestamp-of-export-call>`
  1065. // where <model-display-name> will be made BigQuery-dataset-name
  1066. // compatible (e.g. most special characters will become underscores),
  1067. // and timestamp will be in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601"
  1068. // format. In the dataset an `evaluated_examples` table will be
  1069. // created. It will have all the same columns as the
  1070. //
  1071. // [primary_table][google.cloud.automl.v1beta1.TablesDatasetMetadata.primary_table_spec_id]
  1072. // of the
  1073. // [dataset][google.cloud.automl.v1beta1.Model.dataset_id] from which
  1074. // the model was created, as they were at the moment of model's
  1075. // evaluation (this includes the target column with its ground
  1076. // truth), followed by a column called "predicted_<target_column>". That
  1077. // last column will contain the model's prediction result for each
  1078. // respective row, given as ARRAY of
  1079. // [AnnotationPayloads][google.cloud.automl.v1beta1.AnnotationPayload],
  1080. // represented as STRUCT-s, containing
  1081. // [TablesAnnotation][google.cloud.automl.v1beta1.TablesAnnotation].
  1082. message ExportEvaluatedExamplesOutputConfig {
  1083. // Required. The destination of the output.
  1084. oneof destination {
  1085. // The BigQuery location where the output is to be written to.
  1086. BigQueryDestination bigquery_destination = 2;
  1087. }
  1088. }
  1089. // The Google Cloud Storage location for the input content.
  1090. message GcsSource {
  1091. // Required. Google Cloud Storage URIs to input files, up to 2000 characters
  1092. // long. Accepted forms:
  1093. // * Full object path, e.g. gs://bucket/directory/object.csv
  1094. repeated string input_uris = 1;
  1095. }
  1096. // The BigQuery location for the input content.
  1097. message BigQuerySource {
  1098. // Required. BigQuery URI to a table, up to 2000 characters long.
  1099. // Accepted forms:
  1100. // * BigQuery path e.g. bq://projectId.bqDatasetId.bqTableId
  1101. string input_uri = 1;
  1102. }
  1103. // The Google Cloud Storage location where the output is to be written to.
  1104. message GcsDestination {
  1105. // Required. Google Cloud Storage URI to output directory, up to 2000
  1106. // characters long.
  1107. // Accepted forms:
  1108. // * Prefix path: gs://bucket/directory
  1109. // The requesting user must have write permission to the bucket.
  1110. // The directory is created if it doesn't exist.
  1111. string output_uri_prefix = 1;
  1112. }
  1113. // The BigQuery location for the output content.
  1114. message BigQueryDestination {
  1115. // Required. BigQuery URI to a project, up to 2000 characters long.
  1116. // Accepted forms:
  1117. // * BigQuery path e.g. bq://projectId
  1118. string output_uri = 1;
  1119. }
  1120. // The GCR location where the image must be pushed to.
  1121. message GcrDestination {
  1122. // Required. Google Contained Registry URI of the new image, up to 2000
  1123. // characters long. See
  1124. //
  1125. // https:
  1126. // //cloud.google.com/container-registry/do
  1127. // // cs/pushing-and-pulling#pushing_an_image_to_a_registry
  1128. // Accepted forms:
  1129. // * [HOSTNAME]/[PROJECT-ID]/[IMAGE]
  1130. // * [HOSTNAME]/[PROJECT-ID]/[IMAGE]:[TAG]
  1131. //
  1132. // The requesting user must have permission to push images the project.
  1133. string output_uri = 1;
  1134. }