jobs.proto 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataproc.v1;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/longrunning/operations.proto";
  20. import "google/protobuf/empty.proto";
  21. import "google/protobuf/field_mask.proto";
  22. import "google/protobuf/timestamp.proto";
  23. option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
  24. option java_multiple_files = true;
  25. option java_outer_classname = "JobsProto";
  26. option java_package = "com.google.cloud.dataproc.v1";
  27. // The JobController provides methods to manage jobs.
  28. service JobController {
  29. option (google.api.default_host) = "dataproc.googleapis.com";
  30. option (google.api.oauth_scopes) =
  31. "https://www.googleapis.com/auth/cloud-platform";
  32. // Submits a job to a cluster.
  33. rpc SubmitJob(SubmitJobRequest) returns (Job) {
  34. option (google.api.http) = {
  35. post: "/v1/projects/{project_id}/regions/{region}/jobs:submit"
  36. body: "*"
  37. };
  38. option (google.api.method_signature) = "project_id,region,job";
  39. }
  40. // Submits job to a cluster.
  41. rpc SubmitJobAsOperation(SubmitJobRequest)
  42. returns (google.longrunning.Operation) {
  43. option (google.api.http) = {
  44. post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation"
  45. body: "*"
  46. };
  47. option (google.api.method_signature) = "project_id, region, job";
  48. option (google.longrunning.operation_info) = {
  49. response_type: "Job"
  50. metadata_type: "JobMetadata"
  51. };
  52. }
  53. // Gets the resource representation for a job in a project.
  54. rpc GetJob(GetJobRequest) returns (Job) {
  55. option (google.api.http) = {
  56. get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
  57. };
  58. option (google.api.method_signature) = "project_id,region,job_id";
  59. }
  60. // Lists regions/{region}/jobs in a project.
  61. rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
  62. option (google.api.http) = {
  63. get: "/v1/projects/{project_id}/regions/{region}/jobs"
  64. };
  65. option (google.api.method_signature) = "project_id,region";
  66. option (google.api.method_signature) = "project_id,region,filter";
  67. }
  68. // Updates a job in a project.
  69. rpc UpdateJob(UpdateJobRequest) returns (Job) {
  70. option (google.api.http) = {
  71. patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
  72. body: "job"
  73. };
  74. }
  75. // Starts a job cancellation request. To access the job resource
  76. // after cancellation, call
  77. // [regions/{region}/jobs.list](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/list)
  78. // or
  79. // [regions/{region}/jobs.get](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/get).
  80. rpc CancelJob(CancelJobRequest) returns (Job) {
  81. option (google.api.http) = {
  82. post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel"
  83. body: "*"
  84. };
  85. option (google.api.method_signature) = "project_id,region,job_id";
  86. }
  87. // Deletes the job from the project. If the job is active, the delete fails,
  88. // and the response returns `FAILED_PRECONDITION`.
  89. rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
  90. option (google.api.http) = {
  91. delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
  92. };
  93. option (google.api.method_signature) = "project_id,region,job_id";
  94. }
  95. }
  96. // The runtime logging config of the job.
  97. message LoggingConfig {
  98. // The Log4j level for job execution. When running an
  99. // [Apache Hive](http://hive.apache.org/) job, Cloud
  100. // Dataproc configures the Hive client to an equivalent verbosity level.
  101. enum Level {
  102. // Level is unspecified. Use default level for log4j.
  103. LEVEL_UNSPECIFIED = 0;
  104. // Use ALL level for log4j.
  105. ALL = 1;
  106. // Use TRACE level for log4j.
  107. TRACE = 2;
  108. // Use DEBUG level for log4j.
  109. DEBUG = 3;
  110. // Use INFO level for log4j.
  111. INFO = 4;
  112. // Use WARN level for log4j.
  113. WARN = 5;
  114. // Use ERROR level for log4j.
  115. ERROR = 6;
  116. // Use FATAL level for log4j.
  117. FATAL = 7;
  118. // Turn off log4j.
  119. OFF = 8;
  120. }
  121. // The per-package log levels for the driver. This may include
  122. // "root" package name to configure rootLogger.
  123. // Examples:
  124. // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
  125. map<string, Level> driver_log_levels = 2;
  126. }
  127. // A Dataproc job for running
  128. // [Apache Hadoop
  129. // MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
  130. // jobs on [Apache Hadoop
  131. // YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
  132. message HadoopJob {
  133. // Required. Indicates the location of the driver's main class. Specify
  134. // either the jar file that contains the main class or the main class name.
  135. // To specify both, add the jar file to `jar_file_uris`, and then specify
  136. // the main class name in this property.
  137. oneof driver {
  138. // The HCFS URI of the jar file containing the main class.
  139. // Examples:
  140. // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
  141. // 'hdfs:/tmp/test-samples/custom-wordcount.jar'
  142. // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
  143. string main_jar_file_uri = 1;
  144. // The name of the driver's main class. The jar file containing the class
  145. // must be in the default CLASSPATH or specified in `jar_file_uris`.
  146. string main_class = 2;
  147. }
  148. // Optional. The arguments to pass to the driver. Do not
  149. // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as
  150. // job properties, since a collision may occur that causes an incorrect job
  151. // submission.
  152. repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
  153. // Optional. Jar file URIs to add to the CLASSPATHs of the
  154. // Hadoop driver and tasks.
  155. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  156. // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
  157. // to the working directory of Hadoop drivers and distributed tasks. Useful
  158. // for naively parallel tasks.
  159. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
  160. // Optional. HCFS URIs of archives to be extracted in the working directory of
  161. // Hadoop drivers and tasks. Supported file types:
  162. // .jar, .tar, .tar.gz, .tgz, or .zip.
  163. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  164. // Optional. A mapping of property names to values, used to configure Hadoop.
  165. // Properties that conflict with values set by the Dataproc API may be
  166. // overwritten. Can include properties set in /etc/hadoop/conf/*-site and
  167. // classes in user code.
  168. map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
  169. // Optional. The runtime log config for job execution.
  170. LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
  171. }
  172. // A Dataproc job for running [Apache Spark](http://spark.apache.org/)
  173. // applications on YARN.
  174. message SparkJob {
  175. // Required. The specification of the main method to call to drive the job.
  176. // Specify either the jar file that contains the main class or the main class
  177. // name. To pass both a main jar and a main class in that jar, add the jar to
  178. // `CommonJob.jar_file_uris`, and then specify the main class name in
  179. // `main_class`.
  180. oneof driver {
  181. // The HCFS URI of the jar file that contains the main class.
  182. string main_jar_file_uri = 1;
  183. // The name of the driver's main class. The jar file that contains the class
  184. // must be in the default CLASSPATH or specified in `jar_file_uris`.
  185. string main_class = 2;
  186. }
  187. // Optional. The arguments to pass to the driver. Do not include arguments,
  188. // such as `--conf`, that can be set as job properties, since a collision may
  189. // occur that causes an incorrect job submission.
  190. repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
  191. // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
  192. // Spark driver and tasks.
  193. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  194. // Optional. HCFS URIs of files to be placed in the working directory of
  195. // each executor. Useful for naively parallel tasks.
  196. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
  197. // Optional. HCFS URIs of archives to be extracted into the working directory
  198. // of each executor. Supported file types:
  199. // .jar, .tar, .tar.gz, .tgz, and .zip.
  200. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  201. // Optional. A mapping of property names to values, used to configure Spark.
  202. // Properties that conflict with values set by the Dataproc API may be
  203. // overwritten. Can include properties set in
  204. // /etc/spark/conf/spark-defaults.conf and classes in user code.
  205. map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
  206. // Optional. The runtime log config for job execution.
  207. LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
  208. }
  209. // A Dataproc job for running
  210. // [Apache
  211. // PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
  212. // applications on YARN.
  213. message PySparkJob {
  214. // Required. The HCFS URI of the main Python file to use as the driver. Must
  215. // be a .py file.
  216. string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
  217. // Optional. The arguments to pass to the driver. Do not include arguments,
  218. // such as `--conf`, that can be set as job properties, since a collision may
  219. // occur that causes an incorrect job submission.
  220. repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
  221. // Optional. HCFS file URIs of Python files to pass to the PySpark
  222. // framework. Supported file types: .py, .egg, and .zip.
  223. repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
  224. // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
  225. // Python driver and tasks.
  226. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  227. // Optional. HCFS URIs of files to be placed in the working directory of
  228. // each executor. Useful for naively parallel tasks.
  229. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
  230. // Optional. HCFS URIs of archives to be extracted into the working directory
  231. // of each executor. Supported file types:
  232. // .jar, .tar, .tar.gz, .tgz, and .zip.
  233. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  234. // Optional. A mapping of property names to values, used to configure PySpark.
  235. // Properties that conflict with values set by the Dataproc API may be
  236. // overwritten. Can include properties set in
  237. // /etc/spark/conf/spark-defaults.conf and classes in user code.
  238. map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
  239. // Optional. The runtime log config for job execution.
  240. LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
  241. }
  242. // A list of queries to run on a cluster.
  243. message QueryList {
  244. // Required. The queries to execute. You do not need to end a query expression
  245. // with a semicolon. Multiple queries can be specified in one
  246. // string by separating each with a semicolon. Here is an example of a
  247. // Dataproc API snippet that uses a QueryList to specify a HiveJob:
  248. //
  249. // "hiveJob": {
  250. // "queryList": {
  251. // "queries": [
  252. // "query1",
  253. // "query2",
  254. // "query3;query4",
  255. // ]
  256. // }
  257. // }
  258. repeated string queries = 1 [(google.api.field_behavior) = REQUIRED];
  259. }
  260. // A Dataproc job for running [Apache Hive](https://hive.apache.org/)
  261. // queries on YARN.
  262. message HiveJob {
  263. // Required. The sequence of Hive queries to execute, specified as either
  264. // an HCFS file URI or a list of queries.
  265. oneof queries {
  266. // The HCFS URI of the script that contains Hive queries.
  267. string query_file_uri = 1;
  268. // A list of queries.
  269. QueryList query_list = 2;
  270. }
  271. // Optional. Whether to continue executing queries if a query fails.
  272. // The default value is `false`. Setting to `true` can be useful when
  273. // executing independent parallel queries.
  274. bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
  275. // Optional. Mapping of query variable names to values (equivalent to the
  276. // Hive command: `SET name="value";`).
  277. map<string, string> script_variables = 4
  278. [(google.api.field_behavior) = OPTIONAL];
  279. // Optional. A mapping of property names and values, used to configure Hive.
  280. // Properties that conflict with values set by the Dataproc API may be
  281. // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
  282. // /etc/hive/conf/hive-site.xml, and classes in user code.
  283. map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
  284. // Optional. HCFS URIs of jar files to add to the CLASSPATH of the
  285. // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
  286. // and UDFs.
  287. repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  288. }
  289. // A Dataproc job for running [Apache Spark
  290. // SQL](http://spark.apache.org/sql/) queries.
  291. message SparkSqlJob {
  292. // Required. The sequence of Spark SQL queries to execute, specified as
  293. // either an HCFS file URI or as a list of queries.
  294. oneof queries {
  295. // The HCFS URI of the script that contains SQL queries.
  296. string query_file_uri = 1;
  297. // A list of queries.
  298. QueryList query_list = 2;
  299. }
  300. // Optional. Mapping of query variable names to values (equivalent to the
  301. // Spark SQL command: SET `name="value";`).
  302. map<string, string> script_variables = 3
  303. [(google.api.field_behavior) = OPTIONAL];
  304. // Optional. A mapping of property names to values, used to configure
  305. // Spark SQL's SparkConf. Properties that conflict with values set by the
  306. // Dataproc API may be overwritten.
  307. map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL];
  308. // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
  309. repeated string jar_file_uris = 56 [(google.api.field_behavior) = OPTIONAL];
  310. // Optional. The runtime log config for job execution.
  311. LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
  312. }
  313. // A Dataproc job for running [Apache Pig](https://pig.apache.org/)
  314. // queries on YARN.
  315. message PigJob {
  316. // Required. The sequence of Pig queries to execute, specified as an HCFS
  317. // file URI or a list of queries.
  318. oneof queries {
  319. // The HCFS URI of the script that contains the Pig queries.
  320. string query_file_uri = 1;
  321. // A list of queries.
  322. QueryList query_list = 2;
  323. }
  324. // Optional. Whether to continue executing queries if a query fails.
  325. // The default value is `false`. Setting to `true` can be useful when
  326. // executing independent parallel queries.
  327. bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
  328. // Optional. Mapping of query variable names to values (equivalent to the Pig
  329. // command: `name=[value]`).
  330. map<string, string> script_variables = 4
  331. [(google.api.field_behavior) = OPTIONAL];
  332. // Optional. A mapping of property names to values, used to configure Pig.
  333. // Properties that conflict with values set by the Dataproc API may be
  334. // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
  335. // /etc/pig/conf/pig.properties, and classes in user code.
  336. map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
  337. // Optional. HCFS URIs of jar files to add to the CLASSPATH of
  338. // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
  339. repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  340. // Optional. The runtime log config for job execution.
  341. LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
  342. }
  343. // A Dataproc job for running
  344. // [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
  345. // applications on YARN.
  346. message SparkRJob {
  347. // Required. The HCFS URI of the main R file to use as the driver.
  348. // Must be a .R file.
  349. string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
  350. // Optional. The arguments to pass to the driver. Do not include arguments,
  351. // such as `--conf`, that can be set as job properties, since a collision may
  352. // occur that causes an incorrect job submission.
  353. repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
  354. // Optional. HCFS URIs of files to be placed in the working directory of
  355. // each executor. Useful for naively parallel tasks.
  356. repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
  357. // Optional. HCFS URIs of archives to be extracted into the working directory
  358. // of each executor. Supported file types:
  359. // .jar, .tar, .tar.gz, .tgz, and .zip.
  360. repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  361. // Optional. A mapping of property names to values, used to configure SparkR.
  362. // Properties that conflict with values set by the Dataproc API may be
  363. // overwritten. Can include properties set in
  364. // /etc/spark/conf/spark-defaults.conf and classes in user code.
  365. map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
  366. // Optional. The runtime log config for job execution.
  367. LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
  368. }
  369. // A Dataproc job for running [Presto](https://prestosql.io/) queries.
  370. // **IMPORTANT**: The [Dataproc Presto Optional
  371. // Component](https://cloud.google.com/dataproc/docs/concepts/components/presto)
  372. // must be enabled when the cluster is created to submit a Presto job to the
  373. // cluster.
  374. message PrestoJob {
  375. // Required. The sequence of Presto queries to execute, specified as
  376. // either an HCFS file URI or as a list of queries.
  377. oneof queries {
  378. // The HCFS URI of the script that contains SQL queries.
  379. string query_file_uri = 1;
  380. // A list of queries.
  381. QueryList query_list = 2;
  382. }
  383. // Optional. Whether to continue executing queries if a query fails.
  384. // The default value is `false`. Setting to `true` can be useful when
  385. // executing independent parallel queries.
  386. bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
  387. // Optional. The format in which query output will be displayed. See the
  388. // Presto documentation for supported output formats
  389. string output_format = 4 [(google.api.field_behavior) = OPTIONAL];
  390. // Optional. Presto client tags to attach to this query
  391. repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];
  392. // Optional. A mapping of property names to values. Used to set Presto
  393. // [session properties](https://prestodb.io/docs/current/sql/set-session.html)
  394. // Equivalent to using the --session flag in the Presto CLI
  395. map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];
  396. // Optional. The runtime log config for job execution.
  397. LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
  398. }
  399. // Dataproc job config.
  400. message JobPlacement {
  401. // Required. The name of the cluster where the job will be submitted.
  402. string cluster_name = 1 [(google.api.field_behavior) = REQUIRED];
  403. // Output only. A cluster UUID generated by the Dataproc service when
  404. // the job is submitted.
  405. string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  406. // Optional. Cluster labels to identify a cluster where the job will be
  407. // submitted.
  408. map<string, string> cluster_labels = 3
  409. [(google.api.field_behavior) = OPTIONAL];
  410. }
  411. // Dataproc job status.
  412. message JobStatus {
  413. // The job state.
  414. enum State {
  415. // The job state is unknown.
  416. STATE_UNSPECIFIED = 0;
  417. // The job is pending; it has been submitted, but is not yet running.
  418. PENDING = 1;
  419. // Job has been received by the service and completed initial setup;
  420. // it will soon be submitted to the cluster.
  421. SETUP_DONE = 8;
  422. // The job is running on the cluster.
  423. RUNNING = 2;
  424. // A CancelJob request has been received, but is pending.
  425. CANCEL_PENDING = 3;
  426. // Transient in-flight resources have been canceled, and the request to
  427. // cancel the running job has been issued to the cluster.
  428. CANCEL_STARTED = 7;
  429. // The job cancellation was successful.
  430. CANCELLED = 4;
  431. // The job has completed successfully.
  432. DONE = 5;
  433. // The job has completed, but encountered an error.
  434. ERROR = 6;
  435. // Job attempt has failed. The detail field contains failure details for
  436. // this attempt.
  437. //
  438. // Applies to restartable jobs only.
  439. ATTEMPT_FAILURE = 9;
  440. }
  441. // The job substate.
  442. enum Substate {
  443. // The job substate is unknown.
  444. UNSPECIFIED = 0;
  445. // The Job is submitted to the agent.
  446. //
  447. // Applies to RUNNING state.
  448. SUBMITTED = 1;
  449. // The Job has been received and is awaiting execution (it may be waiting
  450. // for a condition to be met). See the "details" field for the reason for
  451. // the delay.
  452. //
  453. // Applies to RUNNING state.
  454. QUEUED = 2;
  455. // The agent-reported status is out of date, which may be caused by a
  456. // loss of communication between the agent and Dataproc. If the
  457. // agent does not send a timely update, the job will fail.
  458. //
  459. // Applies to RUNNING state.
  460. STALE_STATUS = 3;
  461. }
  462. // Output only. A state message specifying the overall job state.
  463. State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  464. // Optional. Output only. Job state details, such as an error
  465. // description if the state is <code>ERROR</code>.
  466. string details = 2 [
  467. (google.api.field_behavior) = OUTPUT_ONLY,
  468. (google.api.field_behavior) = OPTIONAL
  469. ];
  470. // Output only. The time when this state was entered.
  471. google.protobuf.Timestamp state_start_time = 6
  472. [(google.api.field_behavior) = OUTPUT_ONLY];
  473. // Output only. Additional state information, which includes
  474. // status reported by the agent.
  475. Substate substate = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
  476. }
  477. // Encapsulates the full scoping used to reference a job.
  478. message JobReference {
  479. // Optional. The ID of the Google Cloud Platform project that the job belongs
  480. // to. If specified, must match the request project ID.
  481. string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
  482. // Optional. The job ID, which must be unique within the project.
  483. //
  484. // The ID must contain only letters (a-z, A-Z), numbers (0-9),
  485. // underscores (_), or hyphens (-). The maximum length is 100 characters.
  486. //
  487. // If not specified by the caller, the job ID will be provided by the server.
  488. string job_id = 2 [(google.api.field_behavior) = OPTIONAL];
  489. }
  490. // A YARN application created by a job. Application information is a subset of
  491. // <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
  492. //
  493. // **Beta Feature**: This report is available for testing purposes only. It may
  494. // be changed before final release.
  495. message YarnApplication {
  496. // The application state, corresponding to
  497. // <code>YarnProtos.YarnApplicationStateProto</code>.
  498. enum State {
  499. // Status is unspecified.
  500. STATE_UNSPECIFIED = 0;
  501. // Status is NEW.
  502. NEW = 1;
  503. // Status is NEW_SAVING.
  504. NEW_SAVING = 2;
  505. // Status is SUBMITTED.
  506. SUBMITTED = 3;
  507. // Status is ACCEPTED.
  508. ACCEPTED = 4;
  509. // Status is RUNNING.
  510. RUNNING = 5;
  511. // Status is FINISHED.
  512. FINISHED = 6;
  513. // Status is FAILED.
  514. FAILED = 7;
  515. // Status is KILLED.
  516. KILLED = 8;
  517. }
  518. // Required. The application name.
  519. string name = 1 [(google.api.field_behavior) = REQUIRED];
  520. // Required. The application state.
  521. State state = 2 [(google.api.field_behavior) = REQUIRED];
  522. // Required. The numerical progress of the application, from 1 to 100.
  523. float progress = 3 [(google.api.field_behavior) = REQUIRED];
  524. // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
  525. // TimelineServer that provides application-specific information. The URL uses
  526. // the internal hostname, and requires a proxy server for resolution and,
  527. // possibly, access.
  528. string tracking_url = 4 [(google.api.field_behavior) = OPTIONAL];
  529. }
  530. // A Dataproc job resource.
  531. message Job {
  532. // Optional. The fully qualified reference to the job, which can be used to
  533. // obtain the equivalent REST path of the job resource. If this property
  534. // is not specified when a job is created, the server generates a
  535. // <code>job_id</code>.
  536. JobReference reference = 1 [(google.api.field_behavior) = OPTIONAL];
  537. // Required. Job information, including how, when, and where to
  538. // run the job.
  539. JobPlacement placement = 2 [(google.api.field_behavior) = REQUIRED];
  540. // Required. The application/framework-specific portion of the job.
  541. oneof type_job {
  542. // Optional. Job is a Hadoop job.
  543. HadoopJob hadoop_job = 3 [(google.api.field_behavior) = OPTIONAL];
  544. // Optional. Job is a Spark job.
  545. SparkJob spark_job = 4 [(google.api.field_behavior) = OPTIONAL];
  546. // Optional. Job is a PySpark job.
  547. PySparkJob pyspark_job = 5 [(google.api.field_behavior) = OPTIONAL];
  548. // Optional. Job is a Hive job.
  549. HiveJob hive_job = 6 [(google.api.field_behavior) = OPTIONAL];
  550. // Optional. Job is a Pig job.
  551. PigJob pig_job = 7 [(google.api.field_behavior) = OPTIONAL];
  552. // Optional. Job is a SparkR job.
  553. SparkRJob spark_r_job = 21 [(google.api.field_behavior) = OPTIONAL];
  554. // Optional. Job is a SparkSql job.
  555. SparkSqlJob spark_sql_job = 12 [(google.api.field_behavior) = OPTIONAL];
  556. // Optional. Job is a Presto job.
  557. PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL];
  558. }
  559. // Output only. The job status. Additional application-specific
  560. // status information may be contained in the <code>type_job</code>
  561. // and <code>yarn_applications</code> fields.
  562. JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  563. // Output only. The previous job status.
  564. repeated JobStatus status_history = 13
  565. [(google.api.field_behavior) = OUTPUT_ONLY];
  566. // Output only. The collection of YARN applications spun up by this job.
  567. //
  568. // **Beta** Feature: This report is available for testing purposes only. It
  569. // may be changed before final release.
  570. repeated YarnApplication yarn_applications = 9
  571. [(google.api.field_behavior) = OUTPUT_ONLY];
  572. // Output only. A URI pointing to the location of the stdout of the job's
  573. // driver program.
  574. string driver_output_resource_uri = 17
  575. [(google.api.field_behavior) = OUTPUT_ONLY];
  576. // Output only. If present, the location of miscellaneous control files
  577. // which may be used as part of job setup and handling. If not present,
  578. // control files may be placed in the same location as `driver_output_uri`.
  579. string driver_control_files_uri = 15
  580. [(google.api.field_behavior) = OUTPUT_ONLY];
  581. // Optional. The labels to associate with this job.
  582. // Label **keys** must contain 1 to 63 characters, and must conform to
  583. // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  584. // Label **values** may be empty, but, if present, must contain 1 to 63
  585. // characters, and must conform to [RFC
  586. // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
  587. // associated with a job.
  588. map<string, string> labels = 18 [(google.api.field_behavior) = OPTIONAL];
  589. // Optional. Job scheduling configuration.
  590. JobScheduling scheduling = 20 [(google.api.field_behavior) = OPTIONAL];
  591. // Output only. A UUID that uniquely identifies a job within the project
  592. // over time. This is in contrast to a user-settable reference.job_id that
  593. // may be reused over time.
  594. string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];
  595. // Output only. Indicates whether the job is completed. If the value is
  596. // `false`, the job is still in progress. If `true`, the job is completed, and
  597. // `status.state` field will indicate if it was successful, failed,
  598. // or cancelled.
  599. bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY];
  600. }
  601. // Job scheduling options.
  602. message JobScheduling {
  603. // Optional. Maximum number of times per hour a driver may be restarted as
  604. // a result of driver exiting with non-zero code before job is
  605. // reported failed.
  606. //
  607. // A job may be reported as thrashing if driver exits with non-zero code
  608. // 4 times within 10 minute window.
  609. //
  610. // Maximum value is 10.
  611. int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL];
  612. // Optional. Maximum number of times in total a driver may be restarted as a
  613. // result of driver exiting with non-zero code before job is reported failed.
  614. // Maximum value is 240.
  615. int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL];
  616. }
  617. // A request to submit a job.
  618. message SubmitJobRequest {
  619. // Required. The ID of the Google Cloud Platform project that the job
  620. // belongs to.
  621. string project_id = 1 [(google.api.field_behavior) = REQUIRED];
  622. // Required. The Dataproc region in which to handle the request.
  623. string region = 3 [(google.api.field_behavior) = REQUIRED];
  624. // Required. The job resource.
  625. Job job = 2 [(google.api.field_behavior) = REQUIRED];
  626. // Optional. A unique id used to identify the request. If the server
  627. // receives two
  628. // [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s
  629. // with the same id, then the second request will be ignored and the
  630. // first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend
  631. // is returned.
  632. //
  633. // It is recommended to always set this value to a
  634. // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
  635. //
  636. // The id must contain only letters (a-z, A-Z), numbers (0-9),
  637. // underscores (_), and hyphens (-). The maximum length is 40 characters.
  638. string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
  639. }
  640. // Job Operation metadata.
  641. message JobMetadata {
  642. // Output only. The job id.
  643. string job_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  644. // Output only. Most recent job status.
  645. JobStatus status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  646. // Output only. Operation type.
  647. string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  648. // Output only. Job submission time.
  649. google.protobuf.Timestamp start_time = 4
  650. [(google.api.field_behavior) = OUTPUT_ONLY];
  651. }
  652. // A request to get the resource representation for a job in a project.
  653. message GetJobRequest {
  654. // Required. The ID of the Google Cloud Platform project that the job
  655. // belongs to.
  656. string project_id = 1 [(google.api.field_behavior) = REQUIRED];
  657. // Required. The Dataproc region in which to handle the request.
  658. string region = 3 [(google.api.field_behavior) = REQUIRED];
  659. // Required. The job ID.
  660. string job_id = 2 [(google.api.field_behavior) = REQUIRED];
  661. }
  662. // A request to list jobs in a project.
  663. message ListJobsRequest {
  664. // A matcher that specifies categories of job states.
  665. enum JobStateMatcher {
  666. // Match all jobs, regardless of state.
  667. ALL = 0;
  668. // Only match jobs in non-terminal states: PENDING, RUNNING, or
  669. // CANCEL_PENDING.
  670. ACTIVE = 1;
  671. // Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
  672. NON_ACTIVE = 2;
  673. }
  674. // Required. The ID of the Google Cloud Platform project that the job
  675. // belongs to.
  676. string project_id = 1 [(google.api.field_behavior) = REQUIRED];
  677. // Required. The Dataproc region in which to handle the request.
  678. string region = 6 [(google.api.field_behavior) = REQUIRED];
  679. // Optional. The number of results to return in each response.
  680. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
  681. // Optional. The page token, returned by a previous call, to request the
  682. // next page of results.
  683. string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
  684. // Optional. If set, the returned jobs list includes only jobs that were
  685. // submitted to the named cluster.
  686. string cluster_name = 4 [(google.api.field_behavior) = OPTIONAL];
  687. // Optional. Specifies enumerated categories of jobs to list.
  688. // (default = match ALL jobs).
  689. //
  690. // If `filter` is provided, `jobStateMatcher` will be ignored.
  691. JobStateMatcher job_state_matcher = 5
  692. [(google.api.field_behavior) = OPTIONAL];
  693. // Optional. A filter constraining the jobs to list. Filters are
  694. // case-sensitive and have the following syntax:
  695. //
  696. // [field = value] AND [field [= value]] ...
  697. //
  698. // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
  699. // key. **value** can be `*` to match all values.
  700. // `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
  701. // Only the logical `AND` operator is supported; space-separated items are
  702. // treated as having an implicit `AND` operator.
  703. //
  704. // Example filter:
  705. //
  706. // status.state = ACTIVE AND labels.env = staging AND labels.starred = *
  707. string filter = 7 [(google.api.field_behavior) = OPTIONAL];
  708. }
  709. // A request to update a job.
  710. message UpdateJobRequest {
  711. // Required. The ID of the Google Cloud Platform project that the job
  712. // belongs to.
  713. string project_id = 1 [(google.api.field_behavior) = REQUIRED];
  714. // Required. The Dataproc region in which to handle the request.
  715. string region = 2 [(google.api.field_behavior) = REQUIRED];
  716. // Required. The job ID.
  717. string job_id = 3 [(google.api.field_behavior) = REQUIRED];
  718. // Required. The changes to the job.
  719. Job job = 4 [(google.api.field_behavior) = REQUIRED];
  720. // Required. Specifies the path, relative to <code>Job</code>, of
  721. // the field to update. For example, to update the labels of a Job the
  722. // <code>update_mask</code> parameter would be specified as
  723. // <code>labels</code>, and the `PATCH` request body would specify the new
  724. // value. <strong>Note:</strong> Currently, <code>labels</code> is the only
  725. // field that can be updated.
  726. google.protobuf.FieldMask update_mask = 5
  727. [(google.api.field_behavior) = REQUIRED];
  728. }
  729. // A list of jobs in a project.
  730. message ListJobsResponse {
  731. // Output only. Jobs list.
  732. repeated Job jobs = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  733. // Optional. This token is included in the response if there are more results
  734. // to fetch. To fetch additional results, provide this value as the
  735. // `page_token` in a subsequent <code>ListJobsRequest</code>.
  736. string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL];
  737. }
  738. // A request to cancel a job.
  739. message CancelJobRequest {
  740. // Required. The ID of the Google Cloud Platform project that the job
  741. // belongs to.
  742. string project_id = 1 [(google.api.field_behavior) = REQUIRED];
  743. // Required. The Dataproc region in which to handle the request.
  744. string region = 3 [(google.api.field_behavior) = REQUIRED];
  745. // Required. The job ID.
  746. string job_id = 2 [(google.api.field_behavior) = REQUIRED];
  747. }
  748. // A request to delete a job.
  749. message DeleteJobRequest {
  750. // Required. The ID of the Google Cloud Platform project that the job
  751. // belongs to.
  752. string project_id = 1 [(google.api.field_behavior) = REQUIRED];
  753. // Required. The Dataproc region in which to handle the request.
  754. string region = 3 [(google.api.field_behavior) = REQUIRED];
  755. // Required. The job ID.
  756. string job_id = 2 [(google.api.field_behavior) = REQUIRED];
  757. }